From aa9be9ff1ff38b9827f095c18dcdc626ecc88359 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Mon, 29 Jul 2024 13:32:00 +1000 Subject: [PATCH 001/135] feat(katana.config): Created file katana.config --- conf/katana.config | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 conf/katana.config diff --git a/conf/katana.config b/conf/katana.config new file mode 100644 index 00000000..e69de29b From 678b21239ec69740953157f34cec2820a5ea1ff0 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Mon, 29 Jul 2024 13:52:10 +1000 Subject: [PATCH 002/135] feat(katana.config): Added params for PBS queues --- conf/base.config | 4 +++ conf/katana.config | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/conf/base.config b/conf/base.config index 37479a30..fecac7f8 100644 --- a/conf/base.config +++ b/conf/base.config @@ -7,6 +7,10 @@ the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. ---------------------------------------------------------------------------------------- */ +params { + cpuQueue = 'submission' + gpuQueue = 'mwacgpu2' +} process { diff --git a/conf/katana.config b/conf/katana.config index e69de29b..e9b41fad 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -0,0 +1,71 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/proteinfold Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ +params { + cpuQueue = 'submission' + gpuQueue = 'mwacgpu2' +} + +process { + + // TODO nf-core: Check the defaults for all processes + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + + executor = 'pbspro' + + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } +} From 82466e0c25c014b730cc4a1047877d365f95a13c Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Mon, 29 Jul 2024 13:52:40 +1000 Subject: [PATCH 003/135] feat(katana.config): Added executor parameter to allow the use Katana scheduler --- conf/base.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/base.config b/conf/base.config index fecac7f8..e9b41fad 100644 --- a/conf/base.config +++ b/conf/base.config @@ -19,6 +19,8 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } + executor = 'pbspro' + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' From 8d2a771b18222e36633b2038501ae52edae39d53 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Mon, 29 Jul 2024 14:00:30 +1000 Subject: [PATCH 004/135] feat(katana.config): Added label configs for pushing to GPU partition --- conf/katana.config | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conf/katana.config b/conf/katana.config index e9b41fad..a344ffc0 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -68,4 +68,13 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + withLabel:gpu_compute { + queue = "${params.gpuQueue}" + accelerator = 1 + clusterOptions = { "-l select=1:ngpus=1:ncpus=${task.cpus}:mem=${task.memory.toMega()}mb" } + + containerOptions = { + workflow.containerEngine == "singularity" ? '--nv' : ( workflow.containerEngine == "docker" ? '--gpus all' : none ) + } + } } From 3c1fb28adbeac9695c3b300d0e737cc0265f5cd1 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Mon, 29 Jul 2024 15:39:28 +1000 Subject: [PATCH 005/135] feat(run_alphafold2): Added 'gpu_compute' label to the Alphafold process --- modules/local/run_alphafold2.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 559ad4e5..4d31b241 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -3,7 +3,7 @@ */ process RUN_ALPHAFOLD2 { tag "$meta.id" - label 'process_medium' + label 'process_medium', 'gpu_compute' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://nfcore/proteinfold_alphafold2_standard:1.0.0' : From e8d2abb9bdb9e00b58006f68a9713352163c269f Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Mon, 29 Jul 2024 17:13:48 +1000 Subject: [PATCH 006/135] feat(run_alphafold2_pred): Added 'gpu_compute' label --- modules/local/run_alphafold2_pred.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 43143b9d..7df9578d 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -3,7 +3,7 @@ */ process RUN_ALPHAFOLD2_PRED { tag "$meta.id" - label 'process_medium' + label 'process_medium', 'gpu_compute' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : From 13dd1cb6058e112743e8fa99fbca0150fc2dddcb Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Mon, 29 Jul 2024 17:16:27 +1000 Subject: [PATCH 007/135] revert(run_alphafold2.nf): Removed GPU compute label from pipeline --- modules/local/run_alphafold2.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 4d31b241..559ad4e5 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -3,7 +3,7 @@ */ process RUN_ALPHAFOLD2 { tag "$meta.id" - label 'process_medium', 'gpu_compute' + label 'process_medium' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://nfcore/proteinfold_alphafold2_standard:1.0.0' : From 24443951f8a8c05e664a8166c8b66f6cc4d08a65 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 29 Jul 2024 17:49:01 +1000 Subject: [PATCH 008/135] Updated database links modified: conf/dbs.config modified: modules/local/run_alphafold2.nf Added variable links in dbs.config and run_alphafold2.nf --- conf/dbs.config | 11 + modules/local/run_alphafold2.nf | 19 +- .../execution_trace_2024-07-29_14-54-51.txt | 1 + .../execution_trace_2024-07-29_14-56-42.txt | 1 + .../execution_trace_2024-07-29_15-01-40.txt | 1 + .../execution_trace_2024-07-29_15-03-42.txt | 1 + pf_files/1L2Y.fasta | 3 + pf_files/proteinfold_run.sh | 9 + pf_files/proteinfold_run_k095.pbs | 16 + pf_files/proteinfold_test.pbs | 15 + pf_files/proteinfold_test.sh | 13 + pf_files/proteinfold_test_cpu.sh | 9 + pf_files/proteinfold_test_k095.pbs | 16 + pf_files/proteinfold_testfile.pbs | 14 + pf_files/proteinfold_testfile.sh | 9 + pf_files/samplesheet.csv | 2 + pf_files/samplesheetold.csv | 3 + .../execution_report_2024-07-29_15-13-50.html | 1041 ++++++++++++++++ .../execution_report_2024-07-29_15-18-14.html | 1041 ++++++++++++++++ .../execution_report_2024-07-29_16-33-39.html | 1082 +++++++++++++++++ .../execution_report_2024-07-29_17-07-25.html | 1082 +++++++++++++++++ ...xecution_timeline_2024-07-29_15-13-50.html | 222 ++++ ...xecution_timeline_2024-07-29_15-18-14.html | 223 ++++ ...xecution_timeline_2024-07-29_16-33-39.html | 223 ++++ ...xecution_timeline_2024-07-29_17-07-25.html | 223 ++++ .../execution_trace_2024-07-29_15-13-50.txt | 1 + .../execution_trace_2024-07-29_15-18-14.txt | 3 + .../execution_trace_2024-07-29_16-27-42.txt | 2 + .../execution_trace_2024-07-29_16-33-39.txt | 3 + .../execution_trace_2024-07-29_17-07-25.txt | 3 + .../execution_trace_2024-07-29_17-32-53.txt | 1 + .../execution_trace_2024-07-29_17-36-09.txt | 1 + .../pipeline_dag_2024-07-29_15-13-50.html | 243 ++++ .../pipeline_dag_2024-07-29_15-18-14.html | 243 ++++ .../pipeline_info/samplesheet.valid.csv | 2 + 35 files changed, 5772 insertions(+), 10 deletions(-) create mode 100644 null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt create mode 100644 null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt create mode 100644 null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt create mode 100644 null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt create mode 100644 pf_files/1L2Y.fasta create mode 100644 pf_files/proteinfold_run.sh create mode 100644 pf_files/proteinfold_run_k095.pbs create mode 100644 pf_files/proteinfold_test.pbs create mode 100755 pf_files/proteinfold_test.sh create mode 100644 pf_files/proteinfold_test_cpu.sh create mode 100644 pf_files/proteinfold_test_k095.pbs create mode 100644 pf_files/proteinfold_testfile.pbs create mode 100644 pf_files/proteinfold_testfile.sh create mode 100644 pf_files/samplesheet.csv create mode 100644 pf_files/samplesheetold.csv create mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html create mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html create mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html create mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html create mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html create mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html create mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html create mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html create mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt create mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt create mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt create mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt create mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt create mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt create mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt create mode 100644 pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html create mode 100644 pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html create mode 100644 pf_files/test_out/pipeline_info/samplesheet.valid.csv diff --git a/conf/dbs.config b/conf/dbs.config index e186f9c0..c3617f49 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -34,6 +34,17 @@ params { pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" uniprot_path = "${params.alphafold2_db}/uniprot/*" + // Alphafold variables + bfd_variable = "${params.alphafold2_db}/bfd/" + small_bfd_variable = "${params.alphafold2_db}/smallbfd/" + mgnify_variable = "${params.alphafold2_db}/mgnify/" + pdb70_variable = "${params.alphafold2_db}/pdb70/" + pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" + uniclust30_variable = "${params.alphafold2_db}/uniclust30/" + uniref90_variable = "${params.alphafold2_db}/uniref90/" + pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" + uniprot_variable = "${params.alphafold2_db}/uniprot/" + // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' uniref30 = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz' diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 559ad4e5..731ad1c1 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -34,17 +34,17 @@ process RUN_ALPHAFOLD2 { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=./uniclust30/uniclust30_2018_08/uniclust30_2018_08" : - "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08/uniclust30_2018_08" : + "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " + alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 " } """ - if [ -f pdb_seqres/pdb_seqres.txt ] - then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt + if [ -f ${params.pdb_seqres_variable}pdb_seqres.txt ] + then sed -i "/^\\w*0/d" ${params.pdb_seqres_variable}pdb_seqres.txt fi if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi python3 /app/alphafold/run_alphafold.py \ @@ -53,10 +53,9 @@ process RUN_ALPHAFOLD2 { --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ + --uniref90_database_path=${params.uniref90_variable}uniref90.fasta \ + --template_mmcif_dir=${params.pdb_mmcif_variable}mmcif_files \ + --obsolete_pdbs_path=${params.pdb_mmcif_variable}obsolete.dat \ --random_seed=53343 \ $args diff --git a/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt b/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt b/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt b/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt b/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/1L2Y.fasta b/pf_files/1L2Y.fasta new file mode 100644 index 00000000..9558f5e0 --- /dev/null +++ b/pf_files/1L2Y.fasta @@ -0,0 +1,3 @@ +>1L2Y_1|Chain A|TC5b|null +NLYIQWLKDGGPSSGRPPPS + diff --git a/pf_files/proteinfold_run.sh b/pf_files/proteinfold_run.sh new file mode 100644 index 00000000..e1391d7f --- /dev/null +++ b/pf_files/proteinfold_run.sh @@ -0,0 +1,9 @@ +nextflow run nf-core/proteinfold -r 1.1.0 \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu false \ + -profile singularity diff --git a/pf_files/proteinfold_run_k095.pbs b/pf_files/proteinfold_run_k095.pbs new file mode 100644 index 00000000..0eccb7b2 --- /dev/null +++ b/pf_files/proteinfold_run_k095.pbs @@ -0,0 +1,16 @@ +#!/bin/bash + +#PBS -l host=k095 +#PBS -l select=1:ncpus=8:mem=40gb +#PBS -l ngpus=1 +#PBS -l walltime=0:30:00 + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_run.sh diff --git a/pf_files/proteinfold_test.pbs b/pf_files/proteinfold_test.pbs new file mode 100644 index 00000000..47cba76b --- /dev/null +++ b/pf_files/proteinfold_test.pbs @@ -0,0 +1,15 @@ +#!/bin/bash + +#PBS -l select=1:ncpus=2:mem=6gb +#PBS -l walltime=6:00:00 +#PBS -j oe + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_test.sh diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh new file mode 100755 index 00000000..040ff685 --- /dev/null +++ b/pf_files/proteinfold_test.sh @@ -0,0 +1,13 @@ +module load nextflow/23.04.4 java/11 +export SINGULARITY_CACHE_DIR=/srv/scratch/sbf/singularity_cache +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cache + +nextflow run /srv/scratch/z5378336/proteinfold/main.nf \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu true \ + -profile singularity diff --git a/pf_files/proteinfold_test_cpu.sh b/pf_files/proteinfold_test_cpu.sh new file mode 100644 index 00000000..c2bf2251 --- /dev/null +++ b/pf_files/proteinfold_test_cpu.sh @@ -0,0 +1,9 @@ +nextflow run nf-core/proteinfold -r 1.1.0 \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu false \ + -profile test diff --git a/pf_files/proteinfold_test_k095.pbs b/pf_files/proteinfold_test_k095.pbs new file mode 100644 index 00000000..627827f4 --- /dev/null +++ b/pf_files/proteinfold_test_k095.pbs @@ -0,0 +1,16 @@ +#!/bin/bash + +#PBS -l host=k095 +#PBS -l select=1:ncpus=8:mem=20gb +#PBS -l ngpus=1 +#PBS -l walltime=0:30:00 + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_test.sh diff --git a/pf_files/proteinfold_testfile.pbs b/pf_files/proteinfold_testfile.pbs new file mode 100644 index 00000000..d7013c16 --- /dev/null +++ b/pf_files/proteinfold_testfile.pbs @@ -0,0 +1,14 @@ +#!/bin/bash + +#PBS -l select=1:ncpus=2:mem=6gb +#PBS -l walltime=6:00:00 + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_testfile.sh diff --git a/pf_files/proteinfold_testfile.sh b/pf_files/proteinfold_testfile.sh new file mode 100644 index 00000000..1d0879f2 --- /dev/null +++ b/pf_files/proteinfold_testfile.sh @@ -0,0 +1,9 @@ +nextflow run nf-core/proteinfold -r 1.1.0 \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu true \ + -profile test diff --git a/pf_files/samplesheet.csv b/pf_files/samplesheet.csv new file mode 100644 index 00000000..879ede04 --- /dev/null +++ b/pf_files/samplesheet.csv @@ -0,0 +1,2 @@ +sequence,fasta +1L2Y,./1L2Y.fasta diff --git a/pf_files/samplesheetold.csv b/pf_files/samplesheetold.csv new file mode 100644 index 00000000..467fdcf0 --- /dev/null +++ b/pf_files/samplesheetold.csv @@ -0,0 +1,3 @@ +sequence,fasta +T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta +T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html new file mode 100644 index 00000000..ebddc9f8 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html @@ -0,0 +1,1041 @@ + + + + + + + + + + + [elegant_rutherford] Nextflow Workflow Report + + + + + + + +
+
+ +

Nextflow workflow report

+

[elegant_rutherford]

+ + +
+

Workflow execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: null.

+

The full error message was:

+
SIGINT
+
+ + +
+
Run times
+
+ 29-Jul-2024 15:13:51 - 29-Jul-2024 15:14:17 + (duration: 25.9s) +
+ +
+
+
  0 succeeded  
+
  0 cached  
+
  0 ignored  
+
  0 failed  
+
+
+ +
Nextflow command
+
nextflow run /srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
+
+ +
+
CPU-Hours
+
(a few seconds)
+ +
Launch directory
+
/srv/scratch/z5378336/proteinfold/pf_files
+ +
Work directory
+
/srv/scratch/z5378336/proteinfold/pf_files/work
+ +
Project directory
+
/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold
+ + +
Script name
+
main.nf
+ + + +
Script ID
+
9c5b06fd002e694ba4b6c4766cd2546f
+ + +
Workflow session
+
3501f224-9f68-4d2a-a50f-2206dd92e59a
+ + + +
Workflow profile
+
singularity
+ + + +
Nextflow version
+
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
+
+
+
+ +
+

Resource Usage

+

These plots give an overview of the distribution of resource usage for each process.

+ +

CPU

+ +
+
+
+
+
+
+
+ +
+ +

Memory

+ +
+
+
+
+
+
+
+
+
+
+
+ +

Job Duration

+ +
+
+
+
+
+
+
+
+ +

I/O

+ +
+
+
+
+
+
+
+
+
+ +
+
+

Tasks

+

This table shows information about each task in the workflow. Use the search box on the right + to filter rows for specific values. Clicking headers will sort the table by that value and + scrolling side to side will reveal more columns.

+
+ + +
+
+
+
+
+ +
+ (tasks table omitted because the dataset is too big) +
+
+ +
+
+ Generated by Nextflow, version 23.04.4 +
+
+ + + + + diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html new file mode 100644 index 00000000..27d7bf18 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html @@ -0,0 +1,1041 @@ + + + + + + + + + + + [sad_lagrange] Nextflow Workflow Report + + + + + + + +
+
+ +

Nextflow workflow report

+

[sad_lagrange]

+ + +
+

Workflow execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: null.

+

The full error message was:

+
SIGINT
+
+ + +
+
Run times
+
+ 29-Jul-2024 15:18:15 - 29-Jul-2024 17:05:50 + (duration: 1h 47m 35s) +
+ +
+
+
  1 succeeded  
+
  0 cached  
+
  0 ignored  
+
  0 failed  
+
+
+ +
Nextflow command
+
nextflow run /srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
+
+ +
+
CPU-Hours
+
(a few seconds)
+ +
Launch directory
+
/srv/scratch/z5378336/proteinfold/pf_files
+ +
Work directory
+
/srv/scratch/z5378336/proteinfold/pf_files/work
+ +
Project directory
+
/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold
+ + +
Script name
+
main.nf
+ + + +
Script ID
+
9c5b06fd002e694ba4b6c4766cd2546f
+ + +
Workflow session
+
2ca0a1d9-b74f-4246-a767-8ec2191b7ef1
+ + + +
Workflow profile
+
singularity
+ + + +
Nextflow version
+
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
+
+
+
+ +
+

Resource Usage

+

These plots give an overview of the distribution of resource usage for each process.

+ +

CPU

+ +
+
+
+
+
+
+
+ +
+ +

Memory

+ +
+
+
+
+
+
+
+
+
+
+
+ +

Job Duration

+ +
+
+
+
+
+
+
+
+ +

I/O

+ +
+
+
+
+
+
+
+
+
+ +
+
+

Tasks

+

This table shows information about each task in the workflow. Use the search box on the right + to filter rows for specific values. Clicking headers will sort the table by that value and + scrolling side to side will reveal more columns.

+
+ + +
+
+
+
+
+ +
+ (tasks table omitted because the dataset is too big) +
+
+ +
+
+ Generated by Nextflow, version 23.04.4 +
+
+ + + + + diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html new file mode 100644 index 00000000..6d917080 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html @@ -0,0 +1,1082 @@ + + + + + + + + + + + [amazing_solvay] Nextflow Workflow Report + + + + + + + +
+
+ +

Nextflow workflow report

+

[amazing_solvay]

+ + +
+

Workflow execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: 1.

+

The full error message was:

+
Error executing process > 'NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)'
+
+Caused by:
+  Process `NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)` terminated with an error exit status (1)
+
+Command executed:
+
+  if [ -f nullpdb_seqres.txt ]
+      then sed -i "/^\w*0/d" /data/bio/alphafold/pdb_seqres//pdb_seqres.txt
+  fi
+  if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
+  python3 /app/alphafold/run_alphafold.py         --fasta_paths=1L2Y.1.fasta         --model_preset=monomer --pdb70_database_path=/data/bio/alphafold/pdb70/pdb70_from_mmcif_200916/pdb70          --db_preset=full_dbs --bfd_database_path=/data/bio/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=/data/bio/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08         --output_dir=$PWD         --data_dir=$PWD         --uniref90_database_path=/data/bio/alphafold/uniref90/uniref90.fasta         --template_mmcif_dir=rsync.rcsb.org::ftp_data/structures/divided/mmCIF/mmcif_files         --obsolete_pdbs_path=rsync.rcsb.org::ftp_data/structures/divided/mmCIF/obsolete.dat         --random_seed=53343         --use_gpu_relax=true --max_template_date 2020-05-14
+  
+  cp "1L2Y.1"/ranked_0.pdb ./"1L2Y.1".alphafold.pdb
+  cd "1L2Y.1"
+  awk '{print $6"\t"$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv
+  for i in 1 2 3 4
+      do awk '{print $6"\t"$11}' ranked_$i.pdb | uniq | awk '{print $2}' > ranked_"$i"_plddt.tsv
+  done
+  paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv
+  echo -e Positions"\t"rank_0"\t"rank_1"\t"rank_2"\t"rank_3"\t"rank_4 > header.tsv
+  cat header.tsv plddt.tsv > ../"1L2Y.1"_plddt_mqc.tsv
+  cd ..
+  
+  cat <<-END_VERSIONS > versions.yml
+  "NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2":
+      python: $(python3 --version | sed 's/Python //g')
+  END_VERSIONS
+
+Command exit status:
+  1
+
+Command output:
+  (empty)
+
+Command error:
+  .command.run: line 299: ${params.bfd_path}1: bad substitution
+
+Work dir:
+  /srv/scratch/z5378336/proteinfold/pf_files/work/bf/c7dbf3dcf0faf201228be86b11efc6
+
+Tip: you can try to figure out what's wrong by changing to the process work dir and showing the script file named `.command.sh`
+
+ + +
+
Run times
+
+ 29-Jul-2024 16:33:41 - 29-Jul-2024 17:02:10 + (duration: 28m 29s) +
+ +
+
+
  1 succeeded  
+
  0 cached  
+
  0 ignored  
+
  1 failed  
+
+
+ +
Nextflow command
+
nextflow run /srv/scratch/z5378336/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
+
+ +
+
CPU-Hours
+
(a few seconds)
+ +
Launch directory
+
/srv/scratch/z5378336/proteinfold/pf_files
+ +
Work directory
+
/srv/scratch/z5378336/proteinfold/pf_files/work
+ +
Project directory
+
/srv/scratch/z5378336/proteinfold
+ + +
Script name
+
main.nf
+ + + +
Script ID
+
9c5b06fd002e694ba4b6c4766cd2546f
+ + +
Workflow session
+
b21e2d4c-1b8a-4340-adcd-c171b2ca9a9d
+ + + +
Workflow profile
+
singularity
+ + + +
Nextflow version
+
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
+
+
+
+ +
+

Resource Usage

+

These plots give an overview of the distribution of resource usage for each process.

+ +

CPU

+ +
+
+
+
+
+
+
+ +
+ +

Memory

+ +
+
+
+
+
+
+
+
+
+
+
+ +

Job Duration

+ +
+
+
+
+
+
+
+
+ +

I/O

+ +
+
+
+
+
+
+
+
+
+ +
+
+

Tasks

+

This table shows information about each task in the workflow. Use the search box on the right + to filter rows for specific values. Clicking headers will sort the table by that value and + scrolling side to side will reveal more columns.

+
+ + +
+
+
+
+
+ +
+ (tasks table omitted because the dataset is too big) +
+
+ +
+
+ Generated by Nextflow, version 23.04.4 +
+
+ + + + + diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html new file mode 100644 index 00000000..1c68a93d --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html @@ -0,0 +1,1082 @@ + + + + + + + + + + + [silly_austin] Nextflow Workflow Report + + + + + + + +
+
+ +

Nextflow workflow report

+

[silly_austin]

+ + +
+

Workflow execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: 1.

+

The full error message was:

+
Error executing process > 'NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)'
+
+Caused by:
+  Process `NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)` terminated with an error exit status (1)
+
+Command executed:
+
+  if [ -f /data/bio/alphafold/pdb_seqres/pdb_seqres.txt ]
+      then sed -i "/^\w*0/d" /data/bio/alphafold/pdb_seqres/pdb_seqres.txt
+  fi
+  if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
+  python3 /app/alphafold/run_alphafold.py         --fasta_paths=1L2Y.1.fasta         --model_preset=monomer --pdb70_database_path=/data/bio/alphafold/pdb70/pdb70          --db_preset=full_dbs --bfd_database_path=/data/bio/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=/data/bio/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08         --output_dir=$PWD         --data_dir=$PWD         --uniref90_database_path=/data/bio/alphafold/uniref90/uniref90.fasta         --template_mmcif_dir=/data/bio/alphafold/pdb_mmcif/mmcif_files         --obsolete_pdbs_path=/data/bio/alphafold/pdb_mmcif/obsolete.dat         --random_seed=53343         --use_gpu_relax=true --max_template_date 2020-05-14
+  
+  cp "1L2Y.1"/ranked_0.pdb ./"1L2Y.1".alphafold.pdb
+  cd "1L2Y.1"
+  awk '{print $6"\t"$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv
+  for i in 1 2 3 4
+      do awk '{print $6"\t"$11}' ranked_$i.pdb | uniq | awk '{print $2}' > ranked_"$i"_plddt.tsv
+  done
+  paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv
+  echo -e Positions"\t"rank_0"\t"rank_1"\t"rank_2"\t"rank_3"\t"rank_4 > header.tsv
+  cat header.tsv plddt.tsv > ../"1L2Y.1"_plddt_mqc.tsv
+  cd ..
+  
+  cat <<-END_VERSIONS > versions.yml
+  "NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2":
+      python: $(python3 --version | sed 's/Python //g')
+  END_VERSIONS
+
+Command exit status:
+  1
+
+Command output:
+  (empty)
+
+Command error:
+  .command.run: line 299: ${params.bfd_path}1: bad substitution
+
+Work dir:
+  /srv/scratch/z5378336/proteinfold/pf_files/work/eb/81fd9dfc117fc74a65cc890353460c
+
+Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run`
+
+ + +
+
Run times
+
+ 29-Jul-2024 17:07:27 - 29-Jul-2024 17:26:59 + (duration: 19m 32s) +
+ +
+
+
  1 succeeded  
+
  0 cached  
+
  0 ignored  
+
  1 failed  
+
+
+ +
Nextflow command
+
nextflow run /srv/scratch/z5378336/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
+
+ +
+
CPU-Hours
+
(a few seconds)
+ +
Launch directory
+
/srv/scratch/z5378336/proteinfold/pf_files
+ +
Work directory
+
/srv/scratch/z5378336/proteinfold/pf_files/work
+ +
Project directory
+
/srv/scratch/z5378336/proteinfold
+ + +
Script name
+
main.nf
+ + + +
Script ID
+
9c5b06fd002e694ba4b6c4766cd2546f
+ + +
Workflow session
+
4288de28-063c-4b0a-82c8-c6d0df5a3c69
+ + + +
Workflow profile
+
singularity
+ + + +
Nextflow version
+
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
+
+
+
+ +
+

Resource Usage

+

These plots give an overview of the distribution of resource usage for each process.

+ +

CPU

+ +
+
+
+
+
+
+
+ +
+ +

Memory

+ +
+
+
+
+
+
+
+
+
+
+
+ +

Job Duration

+ +
+
+
+
+
+
+
+
+ +

I/O

+ +
+
+
+
+
+
+
+
+
+ +
+
+

Tasks

+

This table shows information about each task in the workflow. Use the search box on the right + to filter rows for specific values. Clicking headers will sort the table by that value and + scrolling side to side will reveal more columns.

+
+ + +
+
+
+
+
+ +
+ (tasks table omitted because the dataset is too big) +
+
+ +
+
+ Generated by Nextflow, version 23.04.4 +
+
+ + + + + diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html new file mode 100644 index 00000000..c87bc8eb --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html @@ -0,0 +1,222 @@ + + + + + + + + + + + + +
+

Processes execution timeline

+

+ Launch time:
+ Elapsed time:
+ Legend: job wall time / memory usage (RAM) +

+
+
+ + + + + + + diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html new file mode 100644 index 00000000..5e552e09 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html @@ -0,0 +1,223 @@ + + + + + + + + + + + + +
+

Processes execution timeline

+

+ Launch time:
+ Elapsed time:
+ Legend: job wall time / memory usage (RAM) +

+
+
+ + + + + + + diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html new file mode 100644 index 00000000..1375ff51 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html @@ -0,0 +1,223 @@ + + + + + + + + + + + + +
+

Processes execution timeline

+

+ Launch time:
+ Elapsed time:
+ Legend: job wall time / memory usage (RAM) +

+
+
+ + + + + + + diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html new file mode 100644 index 00000000..fb094096 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html @@ -0,0 +1,223 @@ + + + + + + + + + + + + +
+

Processes execution timeline

+

+ Launch time:
+ Elapsed time:
+ Legend: job wall time / memory usage (RAM) +

+
+
+ + + + + + + diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt new file mode 100644 index 00000000..4a4ef576 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt @@ -0,0 +1,3 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar +1 1d/d00ae9 1861667 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 15:18:29.085 2.9s 1s 18.0% 2.9 MB 22.1 MB 1.4 MB 641 B +2 79/773162 1862273 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (T1024_T1) ABORTED - 2024-07-29 15:30:06.452 - - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt new file mode 100644 index 00000000..82e28f17 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt @@ -0,0 +1,2 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar +1 e2/28e478 1443250 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 16:29:24.063 8.7s 1s 6.1% 15.2 MB 38.1 MB 1.4 MB 455 B diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt new file mode 100644 index 00000000..2b541591 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt @@ -0,0 +1,3 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar +1 40/92b88f 1444153 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 16:35:11.644 7.7s 1s 8.8% 4.3 MB 29.8 MB 1.4 MB 444 B +2 bf/c7dbf3 1445514 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1) FAILED 1 2024-07-29 17:02:06.143 3.1s 2.6s - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt new file mode 100644 index 00000000..2a993552 --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt @@ -0,0 +1,3 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar +1 96/58a04d 1445890 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 17:09:01.045 6.6s 1s 13.3% 3.3 MB 26.8 MB 1.4 MB 445 B +2 eb/81fd9d 1446739 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1) FAILED 1 2024-07-29 17:26:53.354 3.9s 3.2s - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html new file mode 100644 index 00000000..4e8d1e4b --- /dev/null +++ b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html @@ -0,0 +1,243 @@ + + + + + + Nextflow Cytoscape.js with Dagre + + + + + + + + + + + +

Nextflow Cytoscape.js with Dagre

+
+ + + diff --git a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html new file mode 100644 index 00000000..4e8d1e4b --- /dev/null +++ b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html @@ -0,0 +1,243 @@ + + + + + + Nextflow Cytoscape.js with Dagre + + + + + + + + + + + +

Nextflow Cytoscape.js with Dagre

+
+ + + diff --git a/pf_files/test_out/pipeline_info/samplesheet.valid.csv b/pf_files/test_out/pipeline_info/samplesheet.valid.csv new file mode 100644 index 00000000..570c3304 --- /dev/null +++ b/pf_files/test_out/pipeline_info/samplesheet.valid.csv @@ -0,0 +1,2 @@ +sequence,fasta +1L2Y_T1,./1L2Y.fasta From 8b9452ae6683558d2c3f01c077538bf53d14bb4a Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Tue, 30 Jul 2024 16:33:52 +1000 Subject: [PATCH 009/135] feat(pf_files): Added testing files --- pf_files/proteinfold_run.sh | 9 +++++++++ pf_files/proteinfold_run_k095.pbs | 16 ++++++++++++++++ pf_files/proteinfold_test.pbs | 15 +++++++++++++++ pf_files/proteinfold_test.sh | 10 ++++++++++ pf_files/proteinfold_test_cpu.sh | 9 +++++++++ pf_files/proteinfold_test_k095.pbs | 16 ++++++++++++++++ pf_files/proteinfold_testfile.pbs | 14 ++++++++++++++ pf_files/proteinfold_testfile.sh | 9 +++++++++ pf_files/samplesheet.csv | 3 +++ 9 files changed, 101 insertions(+) create mode 100644 pf_files/proteinfold_run.sh create mode 100644 pf_files/proteinfold_run_k095.pbs create mode 100644 pf_files/proteinfold_test.pbs create mode 100755 pf_files/proteinfold_test.sh create mode 100644 pf_files/proteinfold_test_cpu.sh create mode 100644 pf_files/proteinfold_test_k095.pbs create mode 100644 pf_files/proteinfold_testfile.pbs create mode 100644 pf_files/proteinfold_testfile.sh create mode 100644 pf_files/samplesheet.csv diff --git a/pf_files/proteinfold_run.sh b/pf_files/proteinfold_run.sh new file mode 100644 index 00000000..e1391d7f --- /dev/null +++ b/pf_files/proteinfold_run.sh @@ -0,0 +1,9 @@ +nextflow run nf-core/proteinfold -r 1.1.0 \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu false \ + -profile singularity diff --git a/pf_files/proteinfold_run_k095.pbs b/pf_files/proteinfold_run_k095.pbs new file mode 100644 index 00000000..0eccb7b2 --- /dev/null +++ b/pf_files/proteinfold_run_k095.pbs @@ -0,0 +1,16 @@ +#!/bin/bash + +#PBS -l host=k095 +#PBS -l select=1:ncpus=8:mem=40gb +#PBS -l ngpus=1 +#PBS -l walltime=0:30:00 + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_run.sh diff --git a/pf_files/proteinfold_test.pbs b/pf_files/proteinfold_test.pbs new file mode 100644 index 00000000..47cba76b --- /dev/null +++ b/pf_files/proteinfold_test.pbs @@ -0,0 +1,15 @@ +#!/bin/bash + +#PBS -l select=1:ncpus=2:mem=6gb +#PBS -l walltime=6:00:00 +#PBS -j oe + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_test.sh diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh new file mode 100755 index 00000000..4eef87e2 --- /dev/null +++ b/pf_files/proteinfold_test.sh @@ -0,0 +1,10 @@ +module load nextflow/23.04.4 java/11 +nextflow run /srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/main.nf \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu true \ + -profile singularity diff --git a/pf_files/proteinfold_test_cpu.sh b/pf_files/proteinfold_test_cpu.sh new file mode 100644 index 00000000..c2bf2251 --- /dev/null +++ b/pf_files/proteinfold_test_cpu.sh @@ -0,0 +1,9 @@ +nextflow run nf-core/proteinfold -r 1.1.0 \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu false \ + -profile test diff --git a/pf_files/proteinfold_test_k095.pbs b/pf_files/proteinfold_test_k095.pbs new file mode 100644 index 00000000..627827f4 --- /dev/null +++ b/pf_files/proteinfold_test_k095.pbs @@ -0,0 +1,16 @@ +#!/bin/bash + +#PBS -l host=k095 +#PBS -l select=1:ncpus=8:mem=20gb +#PBS -l ngpus=1 +#PBS -l walltime=0:30:00 + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_test.sh diff --git a/pf_files/proteinfold_testfile.pbs b/pf_files/proteinfold_testfile.pbs new file mode 100644 index 00000000..d7013c16 --- /dev/null +++ b/pf_files/proteinfold_testfile.pbs @@ -0,0 +1,14 @@ +#!/bin/bash + +#PBS -l select=1:ncpus=2:mem=6gb +#PBS -l walltime=6:00:00 + +export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir + +module load nextflow/23.04.4 +module load java/11.0.17_8-openjdk + +cd $PBS_O_WORKDIR + +bash proteinfold_testfile.sh diff --git a/pf_files/proteinfold_testfile.sh b/pf_files/proteinfold_testfile.sh new file mode 100644 index 00000000..1d0879f2 --- /dev/null +++ b/pf_files/proteinfold_testfile.sh @@ -0,0 +1,9 @@ +nextflow run nf-core/proteinfold -r 1.1.0 \ + --input samplesheet.csv \ + --outdir test_out \ + --mode alphafold2 \ + --alphafold2_db /data/bio/alphafold \ + --full_dbs true \ + --alphafold2_model_preset monomer \ + --use_gpu true \ + -profile test diff --git a/pf_files/samplesheet.csv b/pf_files/samplesheet.csv new file mode 100644 index 00000000..467fdcf0 --- /dev/null +++ b/pf_files/samplesheet.csv @@ -0,0 +1,3 @@ +sequence,fasta +T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta +T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta From 0962f916e4f523acecb3c7149601adffa74974c7 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Tue, 30 Jul 2024 17:52:03 +1000 Subject: [PATCH 010/135] fix(proteinfold_test.sh): Made path to main.nf rel --- pf_files/proteinfold_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 9d66e6dd..10fc0b14 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -3,7 +3,7 @@ module load nextflow/23.04.4 java/11 export SINGULARITY_CACHE_DIR=/srv/scratch/sbf/singularity_cache export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cache -nextflow run /srv/scratch/z5378336/proteinfold/main.nf \ +nextflow run ../main.nf \ --input samplesheet.csv \ --outdir test_out \ --mode alphafold2 \ From 992d6d1c90c46039a105073fb3b65c45502174bc Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Tue, 30 Jul 2024 17:54:50 +1000 Subject: [PATCH 011/135] revert(base.config): Changed executor back to local for testing as cluster tooling is incomlete --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index e9b41fad..b7f6a482 100644 --- a/conf/base.config +++ b/conf/base.config @@ -19,7 +19,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - executor = 'pbspro' + //executor = 'pbspro' errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 From 32d466c73c676566352d536ca7523de3544f6d49 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Tue, 30 Jul 2024 18:00:02 +1000 Subject: [PATCH 012/135] fix(proteinfold_test.sh): Changed mode to 'split_msa_production' --- pf_files/proteinfold_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 10fc0b14..d405fed6 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -10,5 +10,6 @@ nextflow run ../main.nf \ --alphafold2_db /data/bio/alphafold \ --full_dbs true \ --alphafold2_model_preset monomer \ + --alphafold2_mode 'split_msa_prediction' \ --use_gpu true \ -profile singularity From b3140e735c4202a39a5847d37c160b56dc420a8a Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:09:51 +1000 Subject: [PATCH 013/135] fix(dbs.conf): Updated dbs.conf to work on UNSW infrastructure --- conf/dbs.config | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index c3617f49..4b2d0e4f 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -25,11 +25,13 @@ params { // Alphafold paths bfd_path = "${params.alphafold2_db}/bfd/*" small_bfd_path = "${params.alphafold2_db}/small_bfd/*" - alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" + //alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" + alphafold2_params_path = "${params.alphafold2_db}/params/*" mgnify_path = "${params.alphafold2_db}/mgnify/*" pdb70_path = "${params.alphafold2_db}/pdb70/**" pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/**" - uniclust30_path = "${params.alphafold2_db}/uniclust30/**" + //uniclust30_path = "${params.alphafold2_db}/uniclust30/**" + uniclust30_path = "/srv/scratch/sbf/uniclust30/**" uniref90_path = "${params.alphafold2_db}/uniref90/*" pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" uniprot_path = "${params.alphafold2_db}/uniprot/*" @@ -40,11 +42,17 @@ params { mgnify_variable = "${params.alphafold2_db}/mgnify/" pdb70_variable = "${params.alphafold2_db}/pdb70/" pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" - uniclust30_variable = "${params.alphafold2_db}/uniclust30/" + //uniclust30_variable = "${params.alphafold2_db}/uniclust30/" + uniclust30_variable = "/srv/scratch/sbf/uniclust30/" uniref90_variable = "${params.alphafold2_db}/uniref90/" pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" uniprot_variable = "${params.alphafold2_db}/uniprot/" + // Alphafold MSA Variables + mgnify_database_path = "${params.alphafold2_db}/mgnify/" + template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/" + obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" + // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' uniref30 = 'https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2202.tar.gz' From 4047e62432d461d4e5d36f44cb6532093ddb5cd0 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:10:27 +1000 Subject: [PATCH 014/135] fix(run_alphafold2_msa): Fixed incorrectly named files --- modules/local/run_alphafold2_msa.nf | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 78278810..10d217ca 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -4,6 +4,7 @@ process RUN_ALPHAFOLD2_MSA { tag "$meta.id" label 'process_medium' + debug true container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://nfcore/proteinfold_alphafold2_msa:1.0.0' : @@ -34,28 +35,30 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=./uniclust30/uniclust30_2018_08/uniclust30_2018_08" : - "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08" : + "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " + alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 " } """ - if [ -f pdb_seqres/pdb_seqres.txt ] - then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt - fi + #if [ -f pdb_seqres/pdb_seqres.txt ] + # \$PDB_SEQRES_TEMP=\$(mktemp --directory) + # cp ${params.pdb_seqres_variable}pdb_seqres.txt \${PDB_SEQRES_TEMP}/ + # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/pdb_seqres.txt + #fi python3 /app/alphafold/run_msa.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2018_12.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ + --uniref90_database_path=${params.uniref90_variable}uniref90.fasta \ + --mgnify_database_path=${params.mgnify_database_path}/mgy_clusters_2022_05.fa \ + --template_mmcif_dir=${params.template_mmcif_dir} \ + --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ $args cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl From 93513bca5d2fcb36dab0dc4a95ecdc293bf180c1 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:11:10 +1000 Subject: [PATCH 015/135] fix(run_alphafold2_pred): Fixed incorrectly named files --- modules/local/run_alphafold2_pred.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 7df9578d..9f6d20a7 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -8,6 +8,7 @@ process RUN_ALPHAFOLD2_PRED { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : 'nfcore/proteinfold_alphafold2_split:1.0.0' }" + echo 'true' input: tuple val(meta), path(fasta) @@ -36,7 +37,8 @@ process RUN_ALPHAFOLD2_PRED { script: def args = task.ext.args ?: '' """ - if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + echo \$PWD + #if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ From a007d5a2ff272879048831c4e24f1fc750fc6018 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:11:44 +1000 Subject: [PATCH 016/135] fix(proteinfold_test.sh): Added singulairty argument --- pf_files/proteinfold_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index d405fed6..9e4883ce 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -12,4 +12,4 @@ nextflow run ../main.nf \ --alphafold2_model_preset monomer \ --alphafold2_mode 'split_msa_prediction' \ --use_gpu true \ - -profile singularity + -profile singularity \ From 232c8c9b1fac0dddf4d862851111f1c39ee0d7c7 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:12:24 +1000 Subject: [PATCH 017/135] fix(samplesheet): Changed sample to a much smaller sample --- pf_files/samplesheet.csv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pf_files/samplesheet.csv b/pf_files/samplesheet.csv index f450a551..10fdfdb9 100644 --- a/pf_files/samplesheet.csv +++ b/pf_files/samplesheet.csv @@ -1,3 +1,2 @@ sequence,fasta -T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta -T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta \ No newline at end of file +1L2Y,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta From 03f257563ce2e14822fee7f0c342cda43185e3bb Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 10:12:50 +1000 Subject: [PATCH 018/135] fix(samplesheet): Changed sampel to a smaller sample --- pf_files/test_out/pipeline_info/samplesheet.valid.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pf_files/test_out/pipeline_info/samplesheet.valid.csv b/pf_files/test_out/pipeline_info/samplesheet.valid.csv index 570c3304..b0a380eb 100644 --- a/pf_files/test_out/pipeline_info/samplesheet.valid.csv +++ b/pf_files/test_out/pipeline_info/samplesheet.valid.csv @@ -1,2 +1,2 @@ sequence,fasta -1L2Y_T1,./1L2Y.fasta +1L2Y_T1,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta From 964f5d01c097177cafddab9666770c462fb94ba4 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:04:20 +1000 Subject: [PATCH 019/135] feat(conf/dbs): Added variables for database names, and file names --- conf/dbs.config | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/conf/dbs.config b/conf/dbs.config index 4b2d0e4f..78863ffb 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -21,6 +21,28 @@ params { pdb_seqres = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' + + def bfd_name = params.bfd ?: 'bfd' + def smallbfd_name = params.smallbfd ?: 'smallbfd' + def mgnify_name = params.mgnify ?: 'mgnify' + def pdb70_name = params.pdb70 ?: 'pdb70' + def pdb_mmcif_name = params.pdb_mmcif ?: 'pdb_mmcif' + def uniclust30_name = params.uniclust30 ?: 'uniclust30' + def uniref90_name = params.uniref90 ?: 'uniref90' + def pdb_seqres_name = params.pdb_seq ?: 'pdb_seqres' + def uniprot_name = params.uniprot ?: 'uniprot' + def alphafold_params_name = params.alphafold_params ?: 'alphafold_params_*' + def mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' + def mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' + + def uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08' + def bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' + def uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' + def pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' + def bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt' + def uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta' + def mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa' + // Alphafold paths bfd_path = "${params.alphafold2_db}/bfd/*" From 2a79fe43af25a20a77eb3b3505075794eeea5efb Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:05:27 +1000 Subject: [PATCH 020/135] feat(conf/dbs): Changed config paths to use database variables instead of hardcoded values --- conf/dbs.config | 57 ++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 78863ffb..8f061a7e 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -45,35 +45,48 @@ params { // Alphafold paths - bfd_path = "${params.alphafold2_db}/bfd/*" - small_bfd_path = "${params.alphafold2_db}/small_bfd/*" + bfd_path = "${params.alphafold2_db}/${bfd_name}/*" + small_bfd_path = "${params.alphafold2_db}/${small_bfd_name}/*" //alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" - alphafold2_params_path = "${params.alphafold2_db}/params/*" - mgnify_path = "${params.alphafold2_db}/mgnify/*" - pdb70_path = "${params.alphafold2_db}/pdb70/**" - pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/**" + //alphafold2_params_path = "${params.alphafold2_db}/params/*" + alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" + mgnify_path = "${params.alphafold2_db}/${mgnify_name}/*" + pdb70_path = "${params.alphafold2_db}/${pdb70_name}/**" + pdb_mmcif_path = "${params.alphafold2_db}/${pdb_mmcif_name}/**" //uniclust30_path = "${params.alphafold2_db}/uniclust30/**" - uniclust30_path = "/srv/scratch/sbf/uniclust30/**" - uniref90_path = "${params.alphafold2_db}/uniref90/*" - pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" - uniprot_path = "${params.alphafold2_db}/uniprot/*" + //uniclust30_path = "/srv/scratch/sbf/uniclust30/**" + uniclust30_path = "${params.alphafold2_db}/${uniclust30_name}/**" + uniref90_path = "${params.alphafold2_db}/${uniref90_name}/*" + pdb_seqres_path = "${params.alphafold2_db}/${pdb_seqres_name}/*" + uniprot_path = "${params.alphafold2_db}/${uniprot_name}/*" // Alphafold variables - bfd_variable = "${params.alphafold2_db}/bfd/" - small_bfd_variable = "${params.alphafold2_db}/smallbfd/" - mgnify_variable = "${params.alphafold2_db}/mgnify/" - pdb70_variable = "${params.alphafold2_db}/pdb70/" - pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" + //bfd_variable = "${params.alphafold2_db}/bfd/" + bfd_dir_path = "${params.alpahfold2_db}/${bfd_name}/" + //small_bfd_variable = "${params.alphafold2_db}/smallbfd/" + small_bfd_dir_path = "${params.alphafold2_db}/${smallbfd_name}/" + //mgnify_variable = "${params.alphafold2_db}/mgnify/" + mgnify_dir_path = "${params.alphafold2_db}/${mgnify_name}/" + //pdb70_variable = "${params.alphafold2_db}/pdb70/" + pdb70_dir_path = "${params.alphafold2_db}/${pdb70_name}/" + //pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" + pdb_mmcif_dir_path = "${params.alphafold2_db}/${pdb_mmcif_name}/" //uniclust30_variable = "${params.alphafold2_db}/uniclust30/" - uniclust30_variable = "/srv/scratch/sbf/uniclust30/" - uniref90_variable = "${params.alphafold2_db}/uniref90/" - pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" - uniprot_variable = "${params.alphafold2_db}/uniprot/" + //uniclust30_variable = "/srv/scratch/sbf/uniclust30/" + uniclust30_dir_path = "${params.alphafold2_db}/${uniclust30_name}/" + //uniref90_variable = "${params.alphafold2_db}/uniref90/" + uniref90_dir_path = "${params.alpahfold2_db}/${uniref90_name}/" + //pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" + pdb_seqres_dir_path = "${params.alphafold2_db}/${pdb_seqres_name}/" + //uniprot_variable = "${params.alphafold2_db}/uniprot/" + uniprot_dir_path = "${params.alphafold2_db}/${uniprot_name}/" // Alphafold MSA Variables - mgnify_database_path = "${params.alphafold2_db}/mgnify/" - template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/" - obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" + mgnify_database_path = "${params.alphafold2_db}/${mgnify_name}/" + //template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/" + template_mmcif_dir = "${params.alphafold2_db}/${mmcif_files_name}/" + //obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" + obsolete_pdbs_path = "${params.alphafold2_db}/${mmcif_obsolete_name}" // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' From c218ad261b2b1d3d0abadfc84002d5dee3ddb829 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:06:46 +1000 Subject: [PATCH 021/135] feat(run_alphafold2): Changed hardcoded paths to use variables and updated variable names --- modules/local/run_alphafold2.nf | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 731ad1c1..5cf964dd 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -34,28 +34,30 @@ process RUN_ALPHAFOLD2 { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08/uniclust30_2018_08" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : + "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}${params.uniprot_fasta_name} " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 " + alphafold2_model_preset += " --pdb70_database_path=${params.pdb_dir_path}${params.pdb70_name} " } """ - if [ -f ${params.pdb_seqres_variable}pdb_seqres.txt ] - then sed -i "/^\\w*0/d" ${params.pdb_seqres_variable}pdb_seqres.txt + if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] + \$PDB_SEQRES_TEMP=\$(mktemp --directory) + cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ + then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} fi - if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + if [ -d ${params.alphafold2_params_path} ]; then ln -r -s ${params.alphafold2_params_path} params/; fi python3 /app/alphafold/run_alphafold.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_variable}uniref90.fasta \ - --template_mmcif_dir=${params.pdb_mmcif_variable}mmcif_files \ - --obsolete_pdbs_path=${params.pdb_mmcif_variable}obsolete.dat \ + --uniref90_database_path=${params.uniref90_dir_path}uniref90.fasta \ + --template_mmcif_dir=${params.template_mmcif_dir} \ + --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ --random_seed=53343 \ $args From edff052c835dbd439e16432e0a2ae1ec3b08221d Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:07:42 +1000 Subject: [PATCH 022/135] feat(run_alphafold2_msa): Removed hardcoded paths and changed variables --- modules/local/run_alphafold2_msa.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 10d217ca..2e2bbe9b 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -35,13 +35,13 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_variable}bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=${params.uniclust30_variable}uniclust30_2018_08" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}bfd-first_non_consensus_sequences.fasta" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${uniclust30_db_name}" : + "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_variable}pdb_seqres.txt --uniprot_database_path=${params.uniprot_variable}/uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}/${params.uniprot_fasta_name} " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_variable}pdb70 " + alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " } """ #if [ -f pdb_seqres/pdb_seqres.txt ] @@ -55,8 +55,8 @@ process RUN_ALPHAFOLD2_MSA { --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_variable}uniref90.fasta \ - --mgnify_database_path=${params.mgnify_database_path}/mgy_clusters_2022_05.fa \ + --uniref90_database_path=${params.uniref90_dir_path}/${params.uniref90_fasta_name} \ + --mgnify_database_path=${params.mgnify_database_path}/${params.mgy_clusters_fasta_name} \ --template_mmcif_dir=${params.template_mmcif_dir} \ --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ $args From ea4459a713655395514c5db9897554e457d75dfc Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 8 Aug 2024 16:08:19 +1000 Subject: [PATCH 023/135] feat(run_alphafold2_msa): Added code from run_alphafold2.nf so that the script does not attempt to modify the existing database file --- modules/local/run_alphafold2_msa.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 2e2bbe9b..ac3bd143 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -44,10 +44,10 @@ process RUN_ALPHAFOLD2_MSA { alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " } """ - #if [ -f pdb_seqres/pdb_seqres.txt ] + #if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] # \$PDB_SEQRES_TEMP=\$(mktemp --directory) - # cp ${params.pdb_seqres_variable}pdb_seqres.txt \${PDB_SEQRES_TEMP}/ - # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/pdb_seqres.txt + # cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ + # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} #fi python3 /app/alphafold/run_msa.py \ --fasta_paths=${fasta} \ From 83ca302ab2222552c07dd5c834c3c5c86808ccb3 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 09:28:30 +1000 Subject: [PATCH 024/135] fix(conf/dbs): Changed variable names to have _prefix on the end to avoid conflicts with existing link variables --- conf/dbs.config | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 8f061a7e..03c06b37 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -22,16 +22,16 @@ params { uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - def bfd_name = params.bfd ?: 'bfd' - def smallbfd_name = params.smallbfd ?: 'smallbfd' - def mgnify_name = params.mgnify ?: 'mgnify' - def pdb70_name = params.pdb70 ?: 'pdb70' - def pdb_mmcif_name = params.pdb_mmcif ?: 'pdb_mmcif' - def uniclust30_name = params.uniclust30 ?: 'uniclust30' - def uniref90_name = params.uniref90 ?: 'uniref90' - def pdb_seqres_name = params.pdb_seq ?: 'pdb_seqres' - def uniprot_name = params.uniprot ?: 'uniprot' - def alphafold_params_name = params.alphafold_params ?: 'alphafold_params_*' + def bfd_name = params.bfd_prefix ?: 'bfd' + def smallbfd_name = params.smallbfd_prefix ?: 'smallbfd' + def mgnify_name = params.mgnify_prefix ?: 'mgnify' + def pdb70_name = params.pdb70_prefix ?: 'pdb70' + def pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' + def uniclust30_name = params.uniclust30_prefix ?: 'uniclust30' + def uniref90_name = params.uniref90_prefix ?: 'uniref90' + def pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' + def uniprot_name = params.uniprot_prefix ?: 'uniprot' + def alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' def mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' def mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' From faba0abfd77997cfc168a5203db2b98b0abe4df9 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 09:29:18 +1000 Subject: [PATCH 025/135] fix(conf/dbs): Changed existing variables to use new prefix variables --- conf/dbs.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 03c06b37..257b7d18 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -46,7 +46,7 @@ params { // Alphafold paths bfd_path = "${params.alphafold2_db}/${bfd_name}/*" - small_bfd_path = "${params.alphafold2_db}/${small_bfd_name}/*" + small_bfd_path = "${params.alphafold2_db}/${smallbfd_name}/*" //alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" //alphafold2_params_path = "${params.alphafold2_db}/params/*" alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" @@ -62,7 +62,7 @@ params { // Alphafold variables //bfd_variable = "${params.alphafold2_db}/bfd/" - bfd_dir_path = "${params.alpahfold2_db}/${bfd_name}/" + bfd_dir_path = "${params.alphafold2_db}/${bfd_name}/" //small_bfd_variable = "${params.alphafold2_db}/smallbfd/" small_bfd_dir_path = "${params.alphafold2_db}/${smallbfd_name}/" //mgnify_variable = "${params.alphafold2_db}/mgnify/" @@ -75,7 +75,7 @@ params { //uniclust30_variable = "/srv/scratch/sbf/uniclust30/" uniclust30_dir_path = "${params.alphafold2_db}/${uniclust30_name}/" //uniref90_variable = "${params.alphafold2_db}/uniref90/" - uniref90_dir_path = "${params.alpahfold2_db}/${uniref90_name}/" + uniref90_dir_path = "${params.alphafold2_db}/${uniref90_name}/" //pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" pdb_seqres_dir_path = "${params.alphafold2_db}/${pdb_seqres_name}/" //uniprot_variable = "${params.alphafold2_db}/uniprot/" From 04cad9dcd24256d3e5866857049826b7096540b8 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 09:34:32 +1000 Subject: [PATCH 026/135] feat(nextflow.config): Added new param variables and defaults to the config file --- nextflow.config | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/nextflow.config b/nextflow.config index ec71afb5..691ee289 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,48 @@ params { full_dbs = false // true full_dbs, false reduced_dbs alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} alphafold2_db = null + + // Database prefixes + bfd_prefix = null + smallbfd_prefix = null + mgnify_prefix = null + pdb70_prefix = null + pdb_mmcif_prefix = null + uniclust30_prefix = null + uniref90_prefix = null + pdb_seq_prefix = null + uniprot_prefix = null + alphafold_params_prefix = null + mmcif_path = null + mmcif_obsolete = null + uniclust30_db = null + bfd_first_non_consensus_sequences = null + uniprot_fasta = null + pdb_seqres_txt = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null + uniref90_fasta = null + mgy_clusters_fasta = null + uniclust30_prefix = null + + bfd_name = null + smallbfd_name = null + mgnify_name = null + pdb70_name = null + pdb_mmcif_name = null + uniclust30_name = null + uniref90_name = null + pdb_seqres_name = null + uniprot_name = null + alphafold_params_name = null + mmcif_files_name = null + mmcif_obsolete_name = null + uniclust30_db_name = null + bfd_first_non_consensus_sequences_name = null + uniprot_fasta_name = null + pdb_seqres_txt_name = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null + uniref90_fasta_name = null + mgy_clusters_fasta_name = null // Alphafold2 links bfd = null From afeb1226dd023a6c199976aacd94694c6a033957 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 9 Aug 2024 12:38:11 +1000 Subject: [PATCH 027/135] feat(dbs): Made variables global --- conf/dbs.config | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 257b7d18..5c34c852 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -22,26 +22,26 @@ params { uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - def bfd_name = params.bfd_prefix ?: 'bfd' - def smallbfd_name = params.smallbfd_prefix ?: 'smallbfd' - def mgnify_name = params.mgnify_prefix ?: 'mgnify' - def pdb70_name = params.pdb70_prefix ?: 'pdb70' - def pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' - def uniclust30_name = params.uniclust30_prefix ?: 'uniclust30' - def uniref90_name = params.uniref90_prefix ?: 'uniref90' - def pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' - def uniprot_name = params.uniprot_prefix ?: 'uniprot' - def alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' - def mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' - def mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' + bfd_name = params.bfd_prefix ?: 'bfd' + smallbfd_name = params.smallbfd_prefix ?: 'smallbfd' + mgnify_name = params.mgnify_prefix ?: 'mgnify' + pdb70_name = params.pdb70_prefix ?: 'pdb70' + pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' + uniclust30_name = params.uniclust30_prefix ?: 'uniclust30' + uniref90_name = params.uniref90_prefix ?: 'uniref90' + pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' + uniprot_name = params.uniprot_prefix ?: 'uniprot' + alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' + mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' + mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' - def uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08' - def bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' - def uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' - def pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' - def bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt' - def uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta' - def mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa' + uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08' + bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' + uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' + pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt' + uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta' + mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa' // Alphafold paths From 3d615b75f76cb6d2bc8dcc991e0378d47f748e79 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:19:28 +1000 Subject: [PATCH 028/135] fix(dbs): Changed database directory default --- conf/dbs.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/dbs.config b/conf/dbs.config index 5c34c852..7d06e41a 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -31,7 +31,7 @@ params { uniref90_name = params.uniref90_prefix ?: 'uniref90' pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' uniprot_name = params.uniprot_prefix ?: 'uniprot' - alphafold_params_name = params.alphafold_params_prefix ?: 'alphafold_params_*' + alphafold_params_name = params.alphafold_params_prefix ?: 'params/alphafold_params_*' mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' From cb292568f827613e116316a30893741e1e02b36e Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:20:37 +1000 Subject: [PATCH 029/135] feat(katana): Temporarily removed PBS job scheduling --- conf/katana.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/katana.config b/conf/katana.config index a344ffc0..67f310a7 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -19,7 +19,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - executor = 'pbspro' + //executor = 'pbspro' errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 From 5829301a5cc29d5b2d5b09b8b53e4ca05c975de6 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:22:29 +1000 Subject: [PATCH 030/135] fix(run_alphafold2): Fixed copy command to point to the correct directory --- modules/local/run_alphafold2.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 5cf964dd..30163279 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -43,7 +43,7 @@ process RUN_ALPHAFOLD2 { alphafold2_model_preset += " --pdb70_database_path=${params.pdb_dir_path}${params.pdb70_name} " } """ - if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] + if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] \$PDB_SEQRES_TEMP=\$(mktemp --directory) cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} From 485e400f5198b94bff46f0a90b784d4f1fbddf7b Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:23:33 +1000 Subject: [PATCH 031/135] fix(run_alphafold2): Updated paths to point to the correct uniclust databse --- modules/local/run_alphafold2_msa.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index ac3bd143..89667a73 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -35,7 +35,7 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${uniclust30_db_name}" : + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" if (alphafold2_model_preset == 'multimer') { alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}/${params.uniprot_fasta_name} " From d008f38feec0e74c8704f252c84883059eb53437 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:26:32 +1000 Subject: [PATCH 032/135] fix(run_alphafold2): Fixed typo --- modules/local/run_alphafold2_msa.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 89667a73..d4fa4f53 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -44,7 +44,7 @@ process RUN_ALPHAFOLD2_MSA { alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " } """ - #if [ -f ${params.pdb_seqres_dir_path}/${params.pbd_seqres_txt_name} ] + #if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] # \$PDB_SEQRES_TEMP=\$(mktemp --directory) # cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} From a0dbd9c7ac3aaa8dc78a0af61aa5ab755d7e82ff Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:37:28 +1000 Subject: [PATCH 033/135] feat(run_alphafold2): Added symlink for params file --- modules/local/run_alphafold2_pred.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 9f6d20a7..cd7c93eb 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -38,7 +38,9 @@ process RUN_ALPHAFOLD2_PRED { def args = task.ext.args ?: '' """ echo \$PWD - #if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi + if [ -d ${params.alphafold2_db}/${params.alphafold2_params_path} ]; + then ln -r -s params/alphafold_params_*/* params/ + fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ From 598cc269a40c52b7c1316aadffa7e6d34150a0eb Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:42:26 +1000 Subject: [PATCH 034/135] feat(nextflow): Changed default to use GPU --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 691ee289..8612912c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,7 @@ params { // Input options input = null mode = 'alphafold2' // {alphafold2, colabfold} - use_gpu = false + use_gpu = true // Alphafold2 parameters alphafold2_mode = "standard" From eba441254ab202113940a48da7e5300d0ded1e69 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:44:15 +1000 Subject: [PATCH 035/135] feat(nextflow): Included katana config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 8612912c..775eb22a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -147,7 +147,7 @@ params { } // Load base.config by default for all pipelines -includeConfig 'conf/base.config' +includeConfig 'conf/katana.config' // Load nf-core custom profiles from different Institutions try { From 4811d3cc235625b08a9930174332aef0840b44b1 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:44:54 +1000 Subject: [PATCH 036/135] feat(test): Added options to katana tests --- pf_files/proteinfold_test.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 9e4883ce..3bdbc024 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -1,15 +1,16 @@ module load nextflow/23.04.4 java/11 -export SINGULARITY_CACHE_DIR=/srv/scratch/sbf/singularity_cache -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cache +export SINGULARITY_CACHE_DIR=/srv/scratch/$USER/Singularity/cache +export NXF_SINGULARITY_CACHEDIR=/srv/scratch/$USER/Singularity/cache nextflow run ../main.nf \ --input samplesheet.csv \ --outdir test_out \ --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ + --alphafold2_db /mnt/af2/ \ --full_dbs true \ - --alphafold2_model_preset monomer \ + --alphafold2_model_preset multimer \ + --alphafold_params_name 'params' \ --alphafold2_mode 'split_msa_prediction' \ --use_gpu true \ -profile singularity \ From 92121114426ba67855a3c2245fa4d73647c4db59 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 14:48:56 +1000 Subject: [PATCH 037/135] revert(nextflow): Changed default GPU to false --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 775eb22a..c1a353e1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,7 @@ params { // Input options input = null mode = 'alphafold2' // {alphafold2, colabfold} - use_gpu = true + use_gpu = false // Alphafold2 parameters alphafold2_mode = "standard" From 07954691cefc137e7f19ee271b3920c415b9ca84 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Fri, 16 Aug 2024 15:31:39 +1000 Subject: [PATCH 038/135] revert(nextflow): Changed config back to base config --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c1a353e1..691ee289 100644 --- a/nextflow.config +++ b/nextflow.config @@ -147,7 +147,7 @@ params { } // Load base.config by default for all pipelines -includeConfig 'conf/katana.config' +includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions try { From 3ade215a1b4ed7a9f99db260ecedd34516a1bce3 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 23 Aug 2024 11:53:37 +1000 Subject: [PATCH 039/135] modified: conf/dbs.config --- conf/dbs.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/conf/dbs.config b/conf/dbs.config index c3617f49..074ec714 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -57,4 +57,12 @@ params { "AlphaFold2-multimer-v2" : "alphafold_params_colab_2022-03-02", "AlphaFold2-ptm" : "alphafold_params_2021-07-14" ] + + // RoseTTAFold links + uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' + pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' + + // RoseTTAFold paths + uniref30_variable = "${params.rosettafold_db}/uniref30/" + pdb100_variable = "${params.rosettafold_db}/pdb100/" } From 5fe8e1df99ff1df65575a5d0ae757f3f19a2e19b Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 5 Sep 2024 16:02:24 +1000 Subject: [PATCH 040/135] feat(katana): Added katana config --- modules/local/run_alphafold2_pred.nf | 8 ++++---- nextflow.config | 4 +++- pf_files/proteinfold_test.sh | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index cd7c93eb..9b371f86 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -9,7 +9,6 @@ process RUN_ALPHAFOLD2_PRED { 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : 'nfcore/proteinfold_alphafold2_split:1.0.0' }" echo 'true' - input: tuple val(meta), path(fasta) val db_preset @@ -38,9 +37,10 @@ process RUN_ALPHAFOLD2_PRED { def args = task.ext.args ?: '' """ echo \$PWD - if [ -d ${params.alphafold2_db}/${params.alphafold2_params_path} ]; - then ln -r -s params/alphafold_params_*/* params/ - fi + #if [ -d ${params.alphafold2_params_path} ]; + #then + ln -r -f -s ${params.alphafold2_params_path}* params/ + #fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ diff --git a/nextflow.config b/nextflow.config index 691ee289..69cc7ad2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -146,8 +146,10 @@ params { } +spack.enabled = true + // Load base.config by default for all pipelines -includeConfig 'conf/base.config' +includeConfig 'conf/katana.config' // Load nf-core custom profiles from different Institutions try { diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh index 3bdbc024..396a63f1 100755 --- a/pf_files/proteinfold_test.sh +++ b/pf_files/proteinfold_test.sh @@ -1,4 +1,4 @@ -module load nextflow/23.04.4 java/11 +module load nextflow/23.04.4 java/11 cuda/11.8.0 export SINGULARITY_CACHE_DIR=/srv/scratch/$USER/Singularity/cache export NXF_SINGULARITY_CACHEDIR=/srv/scratch/$USER/Singularity/cache From 9af55b19269bca2fb9de8565ef1b71cac7d2a279 Mon Sep 17 00:00:00 2001 From: nbtm-sh Date: Thu, 10 Oct 2024 12:24:32 +1100 Subject: [PATCH 041/135] feat(style): pushing uncommited changes --- modules/local/run_alphafold2.nf | 40 +++++++++++++++++----------- modules/local/run_alphafold2_msa.nf | 39 ++++++++++++++------------- modules/local/run_alphafold2_pred.nf | 23 ++++++++-------- 3 files changed, 57 insertions(+), 45 deletions(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 30163279..013f3ed1 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -5,9 +5,12 @@ process RUN_ALPHAFOLD2 { tag "$meta.id" label 'process_medium' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_standard:1.0.0' : - 'nfcore/proteinfold_alphafold2_standard:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_standard:1.1.1" input: tuple val(meta), path(fasta) @@ -19,7 +22,7 @@ process RUN_ALPHAFOLD2 { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -34,31 +37,35 @@ process RUN_ALPHAFOLD2 { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" : + "reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}${params.uniprot_fasta_name} " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb_dir_path}${params.pdb70_name} " + alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 " } """ - if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] - \$PDB_SEQRES_TEMP=\$(mktemp --directory) - cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ - then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} + RUNTIME_TMP=\$(mktemp -d) + nvcc --version 2>&1 | tee /home/z3545907/nvcc.txt + nvidia-smi 2>&1 | tee /home/z3545907/nvidia-smi.txt + if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ] + cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP} + then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt fi - if [ -d ${params.alphafold2_params_path} ]; then ln -r -s ${params.alphafold2_params_path} params/; fi + if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi python3 /app/alphafold/run_alphafold.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_dir_path}uniref90.fasta \ - --template_mmcif_dir=${params.template_mmcif_dir} \ - --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ + --uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \ + --mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \ + --template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \ + --obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \ --random_seed=53343 \ + --use_gpu_relax \ $args cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb @@ -71,6 +78,7 @@ process RUN_ALPHAFOLD2 { echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv cd .. + rm -rf "\${RUNTIME_TMP}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index d4fa4f53..7878d9d1 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -4,11 +4,13 @@ process RUN_ALPHAFOLD2_MSA { tag "$meta.id" label 'process_medium' - debug true - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_msa:1.0.0' : - 'nfcore/proteinfold_alphafold2_msa:1.0.0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_msa:1.1.1" input: tuple val(meta), path(fasta) @@ -20,7 +22,7 @@ process RUN_ALPHAFOLD2_MSA { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -35,33 +37,34 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' - def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.bfd_dir_path}${params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name} --uniclust30_database_path=${params.uniclust30_dir_path}${params.uniclust30_db_name}" : - "reduced_dbs --small_bfd_database_path=${params.small_bfd_path}${params.bfd_first_non_consensus_sequences_name}" + def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" : + "reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} --uniprot_database_path=${params.uniprot_dir_path}/${params.uniprot_fasta_name} " + alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.pdb70_dir_path}${params.pdb70_name} " + alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 " } """ - #if [ -f ${params.pdb_seqres_dir_path}/${params.pdb_seqres_txt_name} ] - # \$PDB_SEQRES_TEMP=\$(mktemp --directory) - # cp ${params.pdb_seqres_dir_path}${params.pdb_seqres_txt_name} \${PDB_SEQRES_TEMP}/ - # then sed -i "/^\\w*0/d" \$PDB_SEQERS_TEMP/${params.pdb_seqres_txt_name} - #fi + RUNTIME_TMP=\$(mktemp -d) + if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ] + cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP} + then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt + fi python3 /app/alphafold/run_msa.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --uniref90_database_path=${params.uniref90_dir_path}/${params.uniref90_fasta_name} \ - --mgnify_database_path=${params.mgnify_database_path}/${params.mgy_clusters_fasta_name} \ - --template_mmcif_dir=${params.template_mmcif_dir} \ - --obsolete_pdbs_path=${params.obsolete_pdbs_path} \ + --uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \ + --mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \ + --template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \ + --obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \ $args cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl + rm -rf "\${RUNTIME_TMP}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 9b371f86..3f34c95f 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -3,12 +3,16 @@ */ process RUN_ALPHAFOLD2_PRED { tag "$meta.id" - label 'process_medium', 'gpu_compute' + label 'process_medium' + label 'gpu_compute' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "nf-core/proteinfold_alphafold2_split:1.1.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://nfcore/proteinfold_alphafold2_split:1.0.0' : - 'nfcore/proteinfold_alphafold2_split:1.0.0' }" - echo 'true' input: tuple val(meta), path(fasta) val db_preset @@ -19,7 +23,7 @@ process RUN_ALPHAFOLD2_PRED { path ('mgnify/*') path ('pdb70/*') path ('pdb_mmcif/*') - path ('uniclust30/*') + path ('uniref30/*') path ('uniref90/*') path ('pdb_seqres/*') path ('uniprot/*') @@ -36,11 +40,7 @@ process RUN_ALPHAFOLD2_PRED { script: def args = task.ext.args ?: '' """ - echo \$PWD - #if [ -d ${params.alphafold2_params_path} ]; - #then - ln -r -f -s ${params.alphafold2_params_path}* params/ - #fi + if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ @@ -48,6 +48,7 @@ process RUN_ALPHAFOLD2_PRED { --data_dir=\$PWD \ --random_seed=53343 \ --msa_path=${msa} \ + --use_gpu_relax \ $args cp "${fasta.baseName}"/ranked_0.pdb ./"${fasta.baseName}".alphafold.pdb From 3a13ab763b4d9ba9add54b319044e821c03212b6 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 11 Oct 2024 15:54:04 +1100 Subject: [PATCH 042/135] deleted: null/pipeline_info/ as per https://github.com/Australian-Structural-Biology-Computing/proteinfold/issues/11 --- null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt | 1 - null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt | 1 - null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt | 1 - null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt | 1 - 4 files changed, 4 deletions(-) delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt diff --git a/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt b/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt b/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt b/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt b/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar From f8b8f4ee65426319d39ea71ee58d2200cd0503bc Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 11 Oct 2024 16:45:28 +1100 Subject: [PATCH 043/135] Draft new file: run_helixfold3.nf --- modules/local/run_helixfold3.nf | 100 ++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 modules/local/run_helixfold3.nf diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf new file mode 100644 index 00000000..58d72cd2 --- /dev/null +++ b/modules/local/run_helixfold3.nf @@ -0,0 +1,100 @@ +/* + * Run HelixFold3 + */ +process RUN_HELIXFOLD3 { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_HELIXFOLD3 module does not support Conda. Please use Docker / Singularity / Podman / Apptainer instead.") + } + + container "helixfold3.sif" + + input: + tuple val(meta), path(fasta) + val db_preset + path ('params/*') + path ('bfd/*') + path ('small_bfd/*') + path ('mgnify/*') + path ('rfam/*') + path ('pdb_mmcif/*') + path ('uniclust30/*') + path ('uniref90/*') + path ('pdb_seqres/*') + path ('uniprot/*') + path ('ccd/*') + + output: + path ("${fasta.baseName}*") + path "*_mqc.tsv", emit: multiqc + path "versions.yaml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + export PATH="/opt/miniforge/envs/helixfold/bin:$PATH" + export PATH="$MAXIT_SRC/bin:$PATH" + export OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" + export RCSBROOT=$MAXIT_SRC + + CUDA_VISIBLE_DEVICES=0 /opt/miniforge/envs/helixfold/bin/python3.9 inference.py \ + --maxit_binary "${MAXIT_SRC}/bin/maxit" \ + --jackhmmer_binary_path "/opt/miniforge/envs/helixfold/bin/jackhmmer" \ + --hhblits_binary_path "/opt/miniforge/envs/helixfold/bin/hhblits" \ + --hhsearch_binary_path "/opt/miniforge/envs/helixfold/bin/hhsearch" \ + --kalign_binary_path "/opt/miniforge/envs/helixfold/bin/kalign" \ + --hmmsearch_binary_path "/opt/miniforge/envs/helixfold/bin/hmmsearch" \ + --hmmbuild_binary_path "/opt/miniforge/envs/helixfold/bin/hmmbuild" \ + --preset='${db_preset}' \ + --bfd_database_path="${params.alphafold2_db}bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \ + --small_bfd_database_path="${params.alphafold2_db}/g/bfd-first_non_consensus_sequences.fasta" \ + --uniclust30_database_path="${params.alphafold2_db}/g/uniclust30/uniclust30_2018_08" \ + --uniprot_database_path="${params.alphafold2_db}uniprot/uniprot.fasta" \ + --pdb_seqres_database_path="${params.alphafold2_db}pdb_seqres/pdb_seqres.txt" \ + --rfam_database_path="${params.alphafold2_db}/g/Rfam-14.9_rep_seq.fasta" \ + --template_mmcif_dir="${params.alphafold2_db}pdb_mmcif/mmcif_files" \ + --obsolete_pdbs_path="${params.alphafold2_db}pdb_mmcif/obsolete.dat" \ + --ccd_preprocessed_path="${params.alphafold2_db}/g/ccd_preprocessed_etkdg.pkl.gz" \ + --max_template_date=2024-08-14 \ + --input_mnt="$fasta" \ + --output_dir="\$PWD" \ + --model_name allatom_demo \ + --init_model init_models/HelixFold3-240814.pdparams \ + --infer_times 5 \ + --precision "bf16" + + cp "${fasta.baseName}"/"${fasta.baseName}"-rank1/predicted_structure.pdb ./"${fasta.baseName}".helixfold.pdb + cd "${fasta.baseName}" + awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank1/predicted_structure.pdb | uniq > ranked_0_plddt.tsv + for i in 1 2 3 4 + do awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank\$i/predicted_structure.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv + done + paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv + echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv + cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv + cd .. + cp ${fasta.baseName}* ./ + + cat <<-END_VERSIONS > versions.yaml + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch ./"${fasta.baseName}".alphafold.pdb + touch ./"${fasta.baseName}"_mqc.tsv + + cat <<-END_VERSIONS > versions.yaml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} From c993475045f3a14b554c253c41605c9e1d13f66b Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 14 Oct 2024 15:27:29 +1100 Subject: [PATCH 044/135] Initial draft rosettafold-all-atom.nf --- modules/local/run_rosettafold-all-atom.nf | 62 + pf_files/1L2Y.fasta | 3 - pf_files/proteinfold_run.sh | 9 - pf_files/proteinfold_run_k095.pbs | 16 - pf_files/proteinfold_test.pbs | 15 - pf_files/proteinfold_test.sh | 16 - pf_files/proteinfold_test_cpu.sh | 9 - pf_files/proteinfold_test_k095.pbs | 16 - pf_files/proteinfold_testfile.pbs | 14 - pf_files/proteinfold_testfile.sh | 9 - pf_files/samplesheet.csv | 2 - pf_files/samplesheetold.csv | 3 - .../execution_report_2024-07-29_15-13-50.html | 1041 ---------------- .../execution_report_2024-07-29_15-18-14.html | 1041 ---------------- .../execution_report_2024-07-29_16-33-39.html | 1082 ----------------- .../execution_report_2024-07-29_17-07-25.html | 1082 ----------------- ...xecution_timeline_2024-07-29_15-13-50.html | 222 ---- ...xecution_timeline_2024-07-29_15-18-14.html | 223 ---- ...xecution_timeline_2024-07-29_16-33-39.html | 223 ---- ...xecution_timeline_2024-07-29_17-07-25.html | 223 ---- .../execution_trace_2024-07-29_15-13-50.txt | 1 - .../execution_trace_2024-07-29_15-18-14.txt | 3 - .../execution_trace_2024-07-29_16-27-42.txt | 2 - .../execution_trace_2024-07-29_16-33-39.txt | 3 - .../execution_trace_2024-07-29_17-07-25.txt | 3 - .../execution_trace_2024-07-29_17-32-53.txt | 1 - .../execution_trace_2024-07-29_17-36-09.txt | 1 - .../pipeline_dag_2024-07-29_15-13-50.html | 243 ---- .../pipeline_dag_2024-07-29_15-18-14.html | 243 ---- .../pipeline_info/samplesheet.valid.csv | 2 - 30 files changed, 62 insertions(+), 5751 deletions(-) create mode 100644 modules/local/run_rosettafold-all-atom.nf delete mode 100644 pf_files/1L2Y.fasta delete mode 100644 pf_files/proteinfold_run.sh delete mode 100644 pf_files/proteinfold_run_k095.pbs delete mode 100644 pf_files/proteinfold_test.pbs delete mode 100755 pf_files/proteinfold_test.sh delete mode 100644 pf_files/proteinfold_test_cpu.sh delete mode 100644 pf_files/proteinfold_test_k095.pbs delete mode 100644 pf_files/proteinfold_testfile.pbs delete mode 100644 pf_files/proteinfold_testfile.sh delete mode 100644 pf_files/samplesheet.csv delete mode 100644 pf_files/samplesheetold.csv delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt delete mode 100644 pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html delete mode 100644 pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html delete mode 100644 pf_files/test_out/pipeline_info/samplesheet.valid.csv diff --git a/modules/local/run_rosettafold-all-atom.nf b/modules/local/run_rosettafold-all-atom.nf new file mode 100644 index 00000000..86df8cd0 --- /dev/null +++ b/modules/local/run_rosettafold-all-atom.nf @@ -0,0 +1,62 @@ +/* + * Run RoseTTAFold-All-Atom + */ +process RUN_ROSETTAFOLD-ALL-ATOM { + tag "$meta.id" + label 'process_medium' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error("Local RUN_ROSETTAFOLD-ALL-ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") + } + + container "RoseTTAFold-All-Atom.sif" + + input: + tuple val(meta), path(file) + + output: + path ("${file.baseName}*") + path "*_mqc.tsv", emit: multiqc + path "versions.yaml", emit: versions + + when: + task.ext.when == null || task.ext.when + +### Need to modify the DB variables to match dbs.config + script: + apptainer run --nv -B /mnt/af2,/srv \ + --env blast_path="${params.blast_path}" \ + --env bfd_path="${params.bfd_path}" \ + --env uniref30_path="${params.uniref30_path}" \ + --env pdb100="${params.pdb100}" \ + RoseTTAFold-All-Atom-dev.sif "$file" + } +# cp "${file.baseName}"/ranked_0.pdb ./"${file.baseName}".rosettafold-all-atom.pdb +# cd "${file.baseName}" +# awk '{print \$6"\\t"\$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv +# for i in 1 2 3 4 +# do awk '{print \$6"\\t"\$11}' ranked_\$i.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv +# done +# paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv +# echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv +# cat header.tsv plddt.tsv > ../"${file.baseName}"_plddt_mqc.tsv +# cd .. + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + "" + touch ./"${file.baseName}".rosettafold-all-atom.pdb + touch ./"${file.baseName}"_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + END_VERSIONS + """ +} diff --git a/pf_files/1L2Y.fasta b/pf_files/1L2Y.fasta deleted file mode 100644 index 9558f5e0..00000000 --- a/pf_files/1L2Y.fasta +++ /dev/null @@ -1,3 +0,0 @@ ->1L2Y_1|Chain A|TC5b|null -NLYIQWLKDGGPSSGRPPPS - diff --git a/pf_files/proteinfold_run.sh b/pf_files/proteinfold_run.sh deleted file mode 100644 index e1391d7f..00000000 --- a/pf_files/proteinfold_run.sh +++ /dev/null @@ -1,9 +0,0 @@ -nextflow run nf-core/proteinfold -r 1.1.0 \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ - --full_dbs true \ - --alphafold2_model_preset monomer \ - --use_gpu false \ - -profile singularity diff --git a/pf_files/proteinfold_run_k095.pbs b/pf_files/proteinfold_run_k095.pbs deleted file mode 100644 index 0eccb7b2..00000000 --- a/pf_files/proteinfold_run_k095.pbs +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -#PBS -l host=k095 -#PBS -l select=1:ncpus=8:mem=40gb -#PBS -l ngpus=1 -#PBS -l walltime=0:30:00 - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_run.sh diff --git a/pf_files/proteinfold_test.pbs b/pf_files/proteinfold_test.pbs deleted file mode 100644 index 47cba76b..00000000 --- a/pf_files/proteinfold_test.pbs +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -#PBS -l select=1:ncpus=2:mem=6gb -#PBS -l walltime=6:00:00 -#PBS -j oe - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_test.sh diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh deleted file mode 100755 index 396a63f1..00000000 --- a/pf_files/proteinfold_test.sh +++ /dev/null @@ -1,16 +0,0 @@ -module load nextflow/23.04.4 java/11 cuda/11.8.0 - -export SINGULARITY_CACHE_DIR=/srv/scratch/$USER/Singularity/cache -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/$USER/Singularity/cache - -nextflow run ../main.nf \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /mnt/af2/ \ - --full_dbs true \ - --alphafold2_model_preset multimer \ - --alphafold_params_name 'params' \ - --alphafold2_mode 'split_msa_prediction' \ - --use_gpu true \ - -profile singularity \ diff --git a/pf_files/proteinfold_test_cpu.sh b/pf_files/proteinfold_test_cpu.sh deleted file mode 100644 index c2bf2251..00000000 --- a/pf_files/proteinfold_test_cpu.sh +++ /dev/null @@ -1,9 +0,0 @@ -nextflow run nf-core/proteinfold -r 1.1.0 \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ - --full_dbs true \ - --alphafold2_model_preset monomer \ - --use_gpu false \ - -profile test diff --git a/pf_files/proteinfold_test_k095.pbs b/pf_files/proteinfold_test_k095.pbs deleted file mode 100644 index 627827f4..00000000 --- a/pf_files/proteinfold_test_k095.pbs +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -#PBS -l host=k095 -#PBS -l select=1:ncpus=8:mem=20gb -#PBS -l ngpus=1 -#PBS -l walltime=0:30:00 - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_test.sh diff --git a/pf_files/proteinfold_testfile.pbs b/pf_files/proteinfold_testfile.pbs deleted file mode 100644 index d7013c16..00000000 --- a/pf_files/proteinfold_testfile.pbs +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -#PBS -l select=1:ncpus=2:mem=6gb -#PBS -l walltime=6:00:00 - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_testfile.sh diff --git a/pf_files/proteinfold_testfile.sh b/pf_files/proteinfold_testfile.sh deleted file mode 100644 index 1d0879f2..00000000 --- a/pf_files/proteinfold_testfile.sh +++ /dev/null @@ -1,9 +0,0 @@ -nextflow run nf-core/proteinfold -r 1.1.0 \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ - --full_dbs true \ - --alphafold2_model_preset monomer \ - --use_gpu true \ - -profile test diff --git a/pf_files/samplesheet.csv b/pf_files/samplesheet.csv deleted file mode 100644 index 10fdfdb9..00000000 --- a/pf_files/samplesheet.csv +++ /dev/null @@ -1,2 +0,0 @@ -sequence,fasta -1L2Y,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta diff --git a/pf_files/samplesheetold.csv b/pf_files/samplesheetold.csv deleted file mode 100644 index 467fdcf0..00000000 --- a/pf_files/samplesheetold.csv +++ /dev/null @@ -1,3 +0,0 @@ -sequence,fasta -T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta -T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html deleted file mode 100644 index ebddc9f8..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html +++ /dev/null @@ -1,1041 +0,0 @@ - - - - - - - - - - - [elegant_rutherford] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[elegant_rutherford]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: null.

-

The full error message was:

-
SIGINT
-
- - -
-
Run times
-
- 29-Jul-2024 15:13:51 - 29-Jul-2024 15:14:17 - (duration: 25.9s) -
- -
-
-
  0 succeeded  
-
  0 cached  
-
  0 ignored  
-
  0 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
3501f224-9f68-4d2a-a50f-2206dd92e59a
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html deleted file mode 100644 index 27d7bf18..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html +++ /dev/null @@ -1,1041 +0,0 @@ - - - - - - - - - - - [sad_lagrange] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[sad_lagrange]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: null.

-

The full error message was:

-
SIGINT
-
- - -
-
Run times
-
- 29-Jul-2024 15:18:15 - 29-Jul-2024 17:05:50 - (duration: 1h 47m 35s) -
- -
-
-
  1 succeeded  
-
  0 cached  
-
  0 ignored  
-
  0 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
2ca0a1d9-b74f-4246-a767-8ec2191b7ef1
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html deleted file mode 100644 index 6d917080..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html +++ /dev/null @@ -1,1082 +0,0 @@ - - - - - - - - - - - [amazing_solvay] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[amazing_solvay]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: 1.

-

The full error message was:

-
Error executing process > 'NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)'
-
-Caused by:
-  Process `NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)` terminated with an error exit status (1)
-
-Command executed:
-
-  if [ -f nullpdb_seqres.txt ]
-      then sed -i "/^\w*0/d" /data/bio/alphafold/pdb_seqres//pdb_seqres.txt
-  fi
-  if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
-  python3 /app/alphafold/run_alphafold.py         --fasta_paths=1L2Y.1.fasta         --model_preset=monomer --pdb70_database_path=/data/bio/alphafold/pdb70/pdb70_from_mmcif_200916/pdb70          --db_preset=full_dbs --bfd_database_path=/data/bio/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=/data/bio/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08         --output_dir=$PWD         --data_dir=$PWD         --uniref90_database_path=/data/bio/alphafold/uniref90/uniref90.fasta         --template_mmcif_dir=rsync.rcsb.org::ftp_data/structures/divided/mmCIF/mmcif_files         --obsolete_pdbs_path=rsync.rcsb.org::ftp_data/structures/divided/mmCIF/obsolete.dat         --random_seed=53343         --use_gpu_relax=true --max_template_date 2020-05-14
-  
-  cp "1L2Y.1"/ranked_0.pdb ./"1L2Y.1".alphafold.pdb
-  cd "1L2Y.1"
-  awk '{print $6"\t"$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv
-  for i in 1 2 3 4
-      do awk '{print $6"\t"$11}' ranked_$i.pdb | uniq | awk '{print $2}' > ranked_"$i"_plddt.tsv
-  done
-  paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv
-  echo -e Positions"\t"rank_0"\t"rank_1"\t"rank_2"\t"rank_3"\t"rank_4 > header.tsv
-  cat header.tsv plddt.tsv > ../"1L2Y.1"_plddt_mqc.tsv
-  cd ..
-  
-  cat <<-END_VERSIONS > versions.yml
-  "NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2":
-      python: $(python3 --version | sed 's/Python //g')
-  END_VERSIONS
-
-Command exit status:
-  1
-
-Command output:
-  (empty)
-
-Command error:
-  .command.run: line 299: ${params.bfd_path}1: bad substitution
-
-Work dir:
-  /srv/scratch/z5378336/proteinfold/pf_files/work/bf/c7dbf3dcf0faf201228be86b11efc6
-
-Tip: you can try to figure out what's wrong by changing to the process work dir and showing the script file named `.command.sh`
-
- - -
-
Run times
-
- 29-Jul-2024 16:33:41 - 29-Jul-2024 17:02:10 - (duration: 28m 29s) -
- -
-
-
  1 succeeded  
-
  0 cached  
-
  0 ignored  
-
  1 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/z5378336/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/z5378336/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
b21e2d4c-1b8a-4340-adcd-c171b2ca9a9d
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html deleted file mode 100644 index 1c68a93d..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html +++ /dev/null @@ -1,1082 +0,0 @@ - - - - - - - - - - - [silly_austin] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[silly_austin]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: 1.

-

The full error message was:

-
Error executing process > 'NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)'
-
-Caused by:
-  Process `NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)` terminated with an error exit status (1)
-
-Command executed:
-
-  if [ -f /data/bio/alphafold/pdb_seqres/pdb_seqres.txt ]
-      then sed -i "/^\w*0/d" /data/bio/alphafold/pdb_seqres/pdb_seqres.txt
-  fi
-  if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
-  python3 /app/alphafold/run_alphafold.py         --fasta_paths=1L2Y.1.fasta         --model_preset=monomer --pdb70_database_path=/data/bio/alphafold/pdb70/pdb70          --db_preset=full_dbs --bfd_database_path=/data/bio/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=/data/bio/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08         --output_dir=$PWD         --data_dir=$PWD         --uniref90_database_path=/data/bio/alphafold/uniref90/uniref90.fasta         --template_mmcif_dir=/data/bio/alphafold/pdb_mmcif/mmcif_files         --obsolete_pdbs_path=/data/bio/alphafold/pdb_mmcif/obsolete.dat         --random_seed=53343         --use_gpu_relax=true --max_template_date 2020-05-14
-  
-  cp "1L2Y.1"/ranked_0.pdb ./"1L2Y.1".alphafold.pdb
-  cd "1L2Y.1"
-  awk '{print $6"\t"$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv
-  for i in 1 2 3 4
-      do awk '{print $6"\t"$11}' ranked_$i.pdb | uniq | awk '{print $2}' > ranked_"$i"_plddt.tsv
-  done
-  paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv
-  echo -e Positions"\t"rank_0"\t"rank_1"\t"rank_2"\t"rank_3"\t"rank_4 > header.tsv
-  cat header.tsv plddt.tsv > ../"1L2Y.1"_plddt_mqc.tsv
-  cd ..
-  
-  cat <<-END_VERSIONS > versions.yml
-  "NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2":
-      python: $(python3 --version | sed 's/Python //g')
-  END_VERSIONS
-
-Command exit status:
-  1
-
-Command output:
-  (empty)
-
-Command error:
-  .command.run: line 299: ${params.bfd_path}1: bad substitution
-
-Work dir:
-  /srv/scratch/z5378336/proteinfold/pf_files/work/eb/81fd9dfc117fc74a65cc890353460c
-
-Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run`
-
- - -
-
Run times
-
- 29-Jul-2024 17:07:27 - 29-Jul-2024 17:26:59 - (duration: 19m 32s) -
- -
-
-
  1 succeeded  
-
  0 cached  
-
  0 ignored  
-
  1 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/z5378336/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/z5378336/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
4288de28-063c-4b0a-82c8-c6d0df5a3c69
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html deleted file mode 100644 index c87bc8eb..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html +++ /dev/null @@ -1,222 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html deleted file mode 100644 index 5e552e09..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html deleted file mode 100644 index 1375ff51..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html deleted file mode 100644 index fb094096..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt deleted file mode 100644 index 6b739acd..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt deleted file mode 100644 index 4a4ef576..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt +++ /dev/null @@ -1,3 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 1d/d00ae9 1861667 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 15:18:29.085 2.9s 1s 18.0% 2.9 MB 22.1 MB 1.4 MB 641 B -2 79/773162 1862273 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (T1024_T1) ABORTED - 2024-07-29 15:30:06.452 - - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt deleted file mode 100644 index 82e28f17..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt +++ /dev/null @@ -1,2 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 e2/28e478 1443250 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 16:29:24.063 8.7s 1s 6.1% 15.2 MB 38.1 MB 1.4 MB 455 B diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt deleted file mode 100644 index 2b541591..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt +++ /dev/null @@ -1,3 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 40/92b88f 1444153 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 16:35:11.644 7.7s 1s 8.8% 4.3 MB 29.8 MB 1.4 MB 444 B -2 bf/c7dbf3 1445514 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1) FAILED 1 2024-07-29 17:02:06.143 3.1s 2.6s - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt deleted file mode 100644 index 2a993552..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt +++ /dev/null @@ -1,3 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 96/58a04d 1445890 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 17:09:01.045 6.6s 1s 13.3% 3.3 MB 26.8 MB 1.4 MB 445 B -2 eb/81fd9d 1446739 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1) FAILED 1 2024-07-29 17:26:53.354 3.9s 3.2s - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt deleted file mode 100644 index 6b739acd..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt deleted file mode 100644 index 6b739acd..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html deleted file mode 100644 index 4e8d1e4b..00000000 --- a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html +++ /dev/null @@ -1,243 +0,0 @@ - - - - - - Nextflow Cytoscape.js with Dagre - - - - - - - - - - - -

Nextflow Cytoscape.js with Dagre

-
- - - diff --git a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html deleted file mode 100644 index 4e8d1e4b..00000000 --- a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html +++ /dev/null @@ -1,243 +0,0 @@ - - - - - - Nextflow Cytoscape.js with Dagre - - - - - - - - - - - -

Nextflow Cytoscape.js with Dagre

-
- - - diff --git a/pf_files/test_out/pipeline_info/samplesheet.valid.csv b/pf_files/test_out/pipeline_info/samplesheet.valid.csv deleted file mode 100644 index b0a380eb..00000000 --- a/pf_files/test_out/pipeline_info/samplesheet.valid.csv +++ /dev/null @@ -1,2 +0,0 @@ -sequence,fasta -1L2Y_T1,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta From 4b3bd9eb9556069f70382fc12266dba4c1c6a4ed Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 14 Oct 2024 15:41:08 +1100 Subject: [PATCH 045/135] Cleaned up folders --- .../execution_trace_2024-07-29_14-54-51.txt | 1 - .../execution_trace_2024-07-29_14-56-42.txt | 1 - .../execution_trace_2024-07-29_15-01-40.txt | 1 - .../execution_trace_2024-07-29_15-03-42.txt | 1 - pf_files/1L2Y.fasta | 3 - pf_files/proteinfold_run.sh | 9 - pf_files/proteinfold_run_k095.pbs | 16 - pf_files/proteinfold_test.pbs | 15 - pf_files/proteinfold_test.sh | 16 - pf_files/proteinfold_test_cpu.sh | 9 - pf_files/proteinfold_test_k095.pbs | 16 - pf_files/proteinfold_testfile.pbs | 14 - pf_files/proteinfold_testfile.sh | 9 - pf_files/samplesheet.csv | 2 - pf_files/samplesheetold.csv | 3 - .../execution_report_2024-07-29_15-13-50.html | 1041 ---------------- .../execution_report_2024-07-29_15-18-14.html | 1041 ---------------- .../execution_report_2024-07-29_16-33-39.html | 1082 ----------------- .../execution_report_2024-07-29_17-07-25.html | 1082 ----------------- ...xecution_timeline_2024-07-29_15-13-50.html | 222 ---- ...xecution_timeline_2024-07-29_15-18-14.html | 223 ---- ...xecution_timeline_2024-07-29_16-33-39.html | 223 ---- ...xecution_timeline_2024-07-29_17-07-25.html | 223 ---- .../execution_trace_2024-07-29_15-13-50.txt | 1 - .../execution_trace_2024-07-29_15-18-14.txt | 3 - .../execution_trace_2024-07-29_16-27-42.txt | 2 - .../execution_trace_2024-07-29_16-33-39.txt | 3 - .../execution_trace_2024-07-29_17-07-25.txt | 3 - .../execution_trace_2024-07-29_17-32-53.txt | 1 - .../execution_trace_2024-07-29_17-36-09.txt | 1 - .../pipeline_dag_2024-07-29_15-13-50.html | 243 ---- .../pipeline_dag_2024-07-29_15-18-14.html | 243 ---- .../pipeline_info/samplesheet.valid.csv | 2 - 33 files changed, 5755 deletions(-) delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt delete mode 100644 null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt delete mode 100644 pf_files/1L2Y.fasta delete mode 100644 pf_files/proteinfold_run.sh delete mode 100644 pf_files/proteinfold_run_k095.pbs delete mode 100644 pf_files/proteinfold_test.pbs delete mode 100755 pf_files/proteinfold_test.sh delete mode 100644 pf_files/proteinfold_test_cpu.sh delete mode 100644 pf_files/proteinfold_test_k095.pbs delete mode 100644 pf_files/proteinfold_testfile.pbs delete mode 100644 pf_files/proteinfold_testfile.sh delete mode 100644 pf_files/samplesheet.csv delete mode 100644 pf_files/samplesheetold.csv delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html delete mode 100644 pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html delete mode 100644 pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt delete mode 100644 pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt delete mode 100644 pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html delete mode 100644 pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html delete mode 100644 pf_files/test_out/pipeline_info/samplesheet.valid.csv diff --git a/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt b/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_14-54-51.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt b/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_14-56-42.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt b/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_15-01-40.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt b/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt deleted file mode 100644 index 6b739acd..00000000 --- a/null/pipeline_info/execution_trace_2024-07-29_15-03-42.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/1L2Y.fasta b/pf_files/1L2Y.fasta deleted file mode 100644 index 9558f5e0..00000000 --- a/pf_files/1L2Y.fasta +++ /dev/null @@ -1,3 +0,0 @@ ->1L2Y_1|Chain A|TC5b|null -NLYIQWLKDGGPSSGRPPPS - diff --git a/pf_files/proteinfold_run.sh b/pf_files/proteinfold_run.sh deleted file mode 100644 index e1391d7f..00000000 --- a/pf_files/proteinfold_run.sh +++ /dev/null @@ -1,9 +0,0 @@ -nextflow run nf-core/proteinfold -r 1.1.0 \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ - --full_dbs true \ - --alphafold2_model_preset monomer \ - --use_gpu false \ - -profile singularity diff --git a/pf_files/proteinfold_run_k095.pbs b/pf_files/proteinfold_run_k095.pbs deleted file mode 100644 index 0eccb7b2..00000000 --- a/pf_files/proteinfold_run_k095.pbs +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -#PBS -l host=k095 -#PBS -l select=1:ncpus=8:mem=40gb -#PBS -l ngpus=1 -#PBS -l walltime=0:30:00 - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_run.sh diff --git a/pf_files/proteinfold_test.pbs b/pf_files/proteinfold_test.pbs deleted file mode 100644 index 47cba76b..00000000 --- a/pf_files/proteinfold_test.pbs +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -#PBS -l select=1:ncpus=2:mem=6gb -#PBS -l walltime=6:00:00 -#PBS -j oe - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_test.sh diff --git a/pf_files/proteinfold_test.sh b/pf_files/proteinfold_test.sh deleted file mode 100755 index 396a63f1..00000000 --- a/pf_files/proteinfold_test.sh +++ /dev/null @@ -1,16 +0,0 @@ -module load nextflow/23.04.4 java/11 cuda/11.8.0 - -export SINGULARITY_CACHE_DIR=/srv/scratch/$USER/Singularity/cache -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/$USER/Singularity/cache - -nextflow run ../main.nf \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /mnt/af2/ \ - --full_dbs true \ - --alphafold2_model_preset multimer \ - --alphafold_params_name 'params' \ - --alphafold2_mode 'split_msa_prediction' \ - --use_gpu true \ - -profile singularity \ diff --git a/pf_files/proteinfold_test_cpu.sh b/pf_files/proteinfold_test_cpu.sh deleted file mode 100644 index c2bf2251..00000000 --- a/pf_files/proteinfold_test_cpu.sh +++ /dev/null @@ -1,9 +0,0 @@ -nextflow run nf-core/proteinfold -r 1.1.0 \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ - --full_dbs true \ - --alphafold2_model_preset monomer \ - --use_gpu false \ - -profile test diff --git a/pf_files/proteinfold_test_k095.pbs b/pf_files/proteinfold_test_k095.pbs deleted file mode 100644 index 627827f4..00000000 --- a/pf_files/proteinfold_test_k095.pbs +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -#PBS -l host=k095 -#PBS -l select=1:ncpus=8:mem=20gb -#PBS -l ngpus=1 -#PBS -l walltime=0:30:00 - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_test.sh diff --git a/pf_files/proteinfold_testfile.pbs b/pf_files/proteinfold_testfile.pbs deleted file mode 100644 index d7013c16..00000000 --- a/pf_files/proteinfold_testfile.pbs +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -#PBS -l select=1:ncpus=2:mem=6gb -#PBS -l walltime=6:00:00 - -export SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir -export NXF_SINGULARITY_CACHEDIR=/srv/scratch/sbf/singularity_cachedir - -module load nextflow/23.04.4 -module load java/11.0.17_8-openjdk - -cd $PBS_O_WORKDIR - -bash proteinfold_testfile.sh diff --git a/pf_files/proteinfold_testfile.sh b/pf_files/proteinfold_testfile.sh deleted file mode 100644 index 1d0879f2..00000000 --- a/pf_files/proteinfold_testfile.sh +++ /dev/null @@ -1,9 +0,0 @@ -nextflow run nf-core/proteinfold -r 1.1.0 \ - --input samplesheet.csv \ - --outdir test_out \ - --mode alphafold2 \ - --alphafold2_db /data/bio/alphafold \ - --full_dbs true \ - --alphafold2_model_preset monomer \ - --use_gpu true \ - -profile test diff --git a/pf_files/samplesheet.csv b/pf_files/samplesheet.csv deleted file mode 100644 index 10fdfdb9..00000000 --- a/pf_files/samplesheet.csv +++ /dev/null @@ -1,2 +0,0 @@ -sequence,fasta -1L2Y,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta diff --git a/pf_files/samplesheetold.csv b/pf_files/samplesheetold.csv deleted file mode 100644 index 467fdcf0..00000000 --- a/pf_files/samplesheetold.csv +++ /dev/null @@ -1,3 +0,0 @@ -sequence,fasta -T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta -T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html deleted file mode 100644 index ebddc9f8..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-13-50.html +++ /dev/null @@ -1,1041 +0,0 @@ - - - - - - - - - - - [elegant_rutherford] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[elegant_rutherford]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: null.

-

The full error message was:

-
SIGINT
-
- - -
-
Run times
-
- 29-Jul-2024 15:13:51 - 29-Jul-2024 15:14:17 - (duration: 25.9s) -
- -
-
-
  0 succeeded  
-
  0 cached  
-
  0 ignored  
-
  0 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
3501f224-9f68-4d2a-a50f-2206dd92e59a
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html deleted file mode 100644 index 27d7bf18..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_15-18-14.html +++ /dev/null @@ -1,1041 +0,0 @@ - - - - - - - - - - - [sad_lagrange] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[sad_lagrange]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: null.

-

The full error message was:

-
SIGINT
-
- - -
-
Run times
-
- 29-Jul-2024 15:18:15 - 29-Jul-2024 17:05:50 - (duration: 1h 47m 35s) -
- -
-
-
  1 succeeded  
-
  0 cached  
-
  0 ignored  
-
  0 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
2ca0a1d9-b74f-4246-a767-8ec2191b7ef1
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html deleted file mode 100644 index 6d917080..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_16-33-39.html +++ /dev/null @@ -1,1082 +0,0 @@ - - - - - - - - - - - [amazing_solvay] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[amazing_solvay]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: 1.

-

The full error message was:

-
Error executing process > 'NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)'
-
-Caused by:
-  Process `NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)` terminated with an error exit status (1)
-
-Command executed:
-
-  if [ -f nullpdb_seqres.txt ]
-      then sed -i "/^\w*0/d" /data/bio/alphafold/pdb_seqres//pdb_seqres.txt
-  fi
-  if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
-  python3 /app/alphafold/run_alphafold.py         --fasta_paths=1L2Y.1.fasta         --model_preset=monomer --pdb70_database_path=/data/bio/alphafold/pdb70/pdb70_from_mmcif_200916/pdb70          --db_preset=full_dbs --bfd_database_path=/data/bio/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=/data/bio/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08         --output_dir=$PWD         --data_dir=$PWD         --uniref90_database_path=/data/bio/alphafold/uniref90/uniref90.fasta         --template_mmcif_dir=rsync.rcsb.org::ftp_data/structures/divided/mmCIF/mmcif_files         --obsolete_pdbs_path=rsync.rcsb.org::ftp_data/structures/divided/mmCIF/obsolete.dat         --random_seed=53343         --use_gpu_relax=true --max_template_date 2020-05-14
-  
-  cp "1L2Y.1"/ranked_0.pdb ./"1L2Y.1".alphafold.pdb
-  cd "1L2Y.1"
-  awk '{print $6"\t"$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv
-  for i in 1 2 3 4
-      do awk '{print $6"\t"$11}' ranked_$i.pdb | uniq | awk '{print $2}' > ranked_"$i"_plddt.tsv
-  done
-  paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv
-  echo -e Positions"\t"rank_0"\t"rank_1"\t"rank_2"\t"rank_3"\t"rank_4 > header.tsv
-  cat header.tsv plddt.tsv > ../"1L2Y.1"_plddt_mqc.tsv
-  cd ..
-  
-  cat <<-END_VERSIONS > versions.yml
-  "NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2":
-      python: $(python3 --version | sed 's/Python //g')
-  END_VERSIONS
-
-Command exit status:
-  1
-
-Command output:
-  (empty)
-
-Command error:
-  .command.run: line 299: ${params.bfd_path}1: bad substitution
-
-Work dir:
-  /srv/scratch/z5378336/proteinfold/pf_files/work/bf/c7dbf3dcf0faf201228be86b11efc6
-
-Tip: you can try to figure out what's wrong by changing to the process work dir and showing the script file named `.command.sh`
-
- - -
-
Run times
-
- 29-Jul-2024 16:33:41 - 29-Jul-2024 17:02:10 - (duration: 28m 29s) -
- -
-
-
  1 succeeded  
-
  0 cached  
-
  0 ignored  
-
  1 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/z5378336/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/z5378336/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
b21e2d4c-1b8a-4340-adcd-c171b2ca9a9d
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html b/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html deleted file mode 100644 index 1c68a93d..00000000 --- a/pf_files/test_out/pipeline_info/execution_report_2024-07-29_17-07-25.html +++ /dev/null @@ -1,1082 +0,0 @@ - - - - - - - - - - - [silly_austin] Nextflow Workflow Report - - - - - - - -
-
- -

Nextflow workflow report

-

[silly_austin]

- - -
-

Workflow execution completed unsuccessfully!

-

The exit status of the task that caused the workflow execution to fail was: 1.

-

The full error message was:

-
Error executing process > 'NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)'
-
-Caused by:
-  Process `NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1)` terminated with an error exit status (1)
-
-Command executed:
-
-  if [ -f /data/bio/alphafold/pdb_seqres/pdb_seqres.txt ]
-      then sed -i "/^\w*0/d" /data/bio/alphafold/pdb_seqres/pdb_seqres.txt
-  fi
-  if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi
-  python3 /app/alphafold/run_alphafold.py         --fasta_paths=1L2Y.1.fasta         --model_preset=monomer --pdb70_database_path=/data/bio/alphafold/pdb70/pdb70          --db_preset=full_dbs --bfd_database_path=/data/bio/alphafold/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniclust30_database_path=/data/bio/alphafold/uniclust30/uniclust30_2018_08/uniclust30_2018_08         --output_dir=$PWD         --data_dir=$PWD         --uniref90_database_path=/data/bio/alphafold/uniref90/uniref90.fasta         --template_mmcif_dir=/data/bio/alphafold/pdb_mmcif/mmcif_files         --obsolete_pdbs_path=/data/bio/alphafold/pdb_mmcif/obsolete.dat         --random_seed=53343         --use_gpu_relax=true --max_template_date 2020-05-14
-  
-  cp "1L2Y.1"/ranked_0.pdb ./"1L2Y.1".alphafold.pdb
-  cd "1L2Y.1"
-  awk '{print $6"\t"$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv
-  for i in 1 2 3 4
-      do awk '{print $6"\t"$11}' ranked_$i.pdb | uniq | awk '{print $2}' > ranked_"$i"_plddt.tsv
-  done
-  paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv
-  echo -e Positions"\t"rank_0"\t"rank_1"\t"rank_2"\t"rank_3"\t"rank_4 > header.tsv
-  cat header.tsv plddt.tsv > ../"1L2Y.1"_plddt_mqc.tsv
-  cd ..
-  
-  cat <<-END_VERSIONS > versions.yml
-  "NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2":
-      python: $(python3 --version | sed 's/Python //g')
-  END_VERSIONS
-
-Command exit status:
-  1
-
-Command output:
-  (empty)
-
-Command error:
-  .command.run: line 299: ${params.bfd_path}1: bad substitution
-
-Work dir:
-  /srv/scratch/z5378336/proteinfold/pf_files/work/eb/81fd9dfc117fc74a65cc890353460c
-
-Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run`
-
- - -
-
Run times
-
- 29-Jul-2024 17:07:27 - 29-Jul-2024 17:26:59 - (duration: 19m 32s) -
- -
-
-
  1 succeeded  
-
  0 cached  
-
  0 ignored  
-
  1 failed  
-
-
- -
Nextflow command
-
nextflow run /srv/scratch/z5378336/proteinfold/main.nf --input samplesheet.csv --outdir test_out --mode alphafold2 --alphafold2_db /data/bio/alphafold --full_dbs true --alphafold2_model_preset monomer --use_gpu true -profile singularity
-
- -
-
CPU-Hours
-
(a few seconds)
- -
Launch directory
-
/srv/scratch/z5378336/proteinfold/pf_files
- -
Work directory
-
/srv/scratch/z5378336/proteinfold/pf_files/work
- -
Project directory
-
/srv/scratch/z5378336/proteinfold
- - -
Script name
-
main.nf
- - - -
Script ID
-
9c5b06fd002e694ba4b6c4766cd2546f
- - -
Workflow session
-
4288de28-063c-4b0a-82c8-c6d0df5a3c69
- - - -
Workflow profile
-
singularity
- - - -
Nextflow version
-
version 23.04.4, build 5881 (25-09-2023 15:34 UTC)
-
-
-
- -
-

Resource Usage

-

These plots give an overview of the distribution of resource usage for each process.

- -

CPU

- -
-
-
-
-
-
-
- -
- -

Memory

- -
-
-
-
-
-
-
-
-
-
-
- -

Job Duration

- -
-
-
-
-
-
-
-
- -

I/O

- -
-
-
-
-
-
-
-
-
- -
-
-

Tasks

-

This table shows information about each task in the workflow. Use the search box on the right - to filter rows for specific values. Clicking headers will sort the table by that value and - scrolling side to side will reveal more columns.

-
- - -
-
-
-
-
- -
- (tasks table omitted because the dataset is too big) -
-
- -
-
- Generated by Nextflow, version 23.04.4 -
-
- - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html deleted file mode 100644 index c87bc8eb..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-13-50.html +++ /dev/null @@ -1,222 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html deleted file mode 100644 index 5e552e09..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_15-18-14.html +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html deleted file mode 100644 index 1375ff51..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_16-33-39.html +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html b/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html deleted file mode 100644 index fb094096..00000000 --- a/pf_files/test_out/pipeline_info/execution_timeline_2024-07-29_17-07-25.html +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - - - - - -
-

Processes execution timeline

-

- Launch time:
- Elapsed time:
- Legend: job wall time / memory usage (RAM) -

-
-
- - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt deleted file mode 100644 index 6b739acd..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-13-50.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt deleted file mode 100644 index 4a4ef576..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_15-18-14.txt +++ /dev/null @@ -1,3 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 1d/d00ae9 1861667 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 15:18:29.085 2.9s 1s 18.0% 2.9 MB 22.1 MB 1.4 MB 641 B -2 79/773162 1862273 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (T1024_T1) ABORTED - 2024-07-29 15:30:06.452 - - - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt deleted file mode 100644 index 82e28f17..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-27-42.txt +++ /dev/null @@ -1,2 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 e2/28e478 1443250 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 16:29:24.063 8.7s 1s 6.1% 15.2 MB 38.1 MB 1.4 MB 455 B diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt deleted file mode 100644 index 2b541591..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_16-33-39.txt +++ /dev/null @@ -1,3 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 40/92b88f 1444153 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 16:35:11.644 7.7s 1s 8.8% 4.3 MB 29.8 MB 1.4 MB 444 B -2 bf/c7dbf3 1445514 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1) FAILED 1 2024-07-29 17:02:06.143 3.1s 2.6s - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt deleted file mode 100644 index 2a993552..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-07-25.txt +++ /dev/null @@ -1,3 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar -1 96/58a04d 1445890 NFCORE_PROTEINFOLD:ALPHAFOLD2:INPUT_CHECK:SAMPLESHEET_CHECK (samplesheet.csv) COMPLETED 0 2024-07-29 17:09:01.045 6.6s 1s 13.3% 3.3 MB 26.8 MB 1.4 MB 445 B -2 eb/81fd9d 1446739 NFCORE_PROTEINFOLD:ALPHAFOLD2:RUN_ALPHAFOLD2 (1L2Y_T1) FAILED 1 2024-07-29 17:26:53.354 3.9s 3.2s - - - - - diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt deleted file mode 100644 index 6b739acd..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-32-53.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt b/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt deleted file mode 100644 index 6b739acd..00000000 --- a/pf_files/test_out/pipeline_info/execution_trace_2024-07-29_17-36-09.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar diff --git a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html deleted file mode 100644 index 4e8d1e4b..00000000 --- a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-13-50.html +++ /dev/null @@ -1,243 +0,0 @@ - - - - - - Nextflow Cytoscape.js with Dagre - - - - - - - - - - - -

Nextflow Cytoscape.js with Dagre

-
- - - diff --git a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html b/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html deleted file mode 100644 index 4e8d1e4b..00000000 --- a/pf_files/test_out/pipeline_info/pipeline_dag_2024-07-29_15-18-14.html +++ /dev/null @@ -1,243 +0,0 @@ - - - - - - Nextflow Cytoscape.js with Dagre - - - - - - - - - - - -

Nextflow Cytoscape.js with Dagre

-
- - - diff --git a/pf_files/test_out/pipeline_info/samplesheet.valid.csv b/pf_files/test_out/pipeline_info/samplesheet.valid.csv deleted file mode 100644 index b0a380eb..00000000 --- a/pf_files/test_out/pipeline_info/samplesheet.valid.csv +++ /dev/null @@ -1,2 +0,0 @@ -sequence,fasta -1L2Y_T1,/srv/scratch/sbf/nextflow_pipelines-dev/proteinfold/pf_files/1L2Y.fasta From 65897c896a1b754eda158fe4333cde0665fad781 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 16 Oct 2024 12:13:09 +1100 Subject: [PATCH 046/135] added workflows/rosettafold-all-atom.nf first draft --- workflows/rosettafold-all-atom.nf | 202 ++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 workflows/rosettafold-all-atom.nf diff --git a/workflows/rosettafold-all-atom.nf b/workflows/rosettafold-all-atom.nf new file mode 100644 index 00000000..87464bee --- /dev/null +++ b/workflows/rosettafold-all-atom.nf @@ -0,0 +1,202 @@ +## Currently just based on the AF2 .nf workflow, requires modification. + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowRosettafold-All-Atom.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.input, + params.rosettafold-all-atom_db +] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input file not specified!' } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { INPUT_CHECK } from '../subworkflows/local/rfaa_input_check' ## Doesn't exist, RFAA takes different inputs than AF2 +include { PREPARE_ROSETTAFOLD-ALL-ATOM_DBS } from '../subworkflows/local/prepare_rosettafold-all-atom_dbs' ## Doesn't exist + +// +// MODULE: Local to the pipeline +// +include { RUN_ROSETTAFOLD-ALL-ATOM } from '../modules/local/run_rosettafold-all-atom' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow ROSETTAFOLD-ALL-ATOM { + + ch_versions = Channel.empty() + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + if (params.rosettafold-all-atom_model_preset != 'multimer') { + INPUT_CHECK ( + ch_input + ) + .fastas + .map { + meta, fasta -> + [ meta, fasta.splitFasta(file:true) ] + } + .transpose() + .set { ch_fasta } + } else { + INPUT_CHECK ( + ch_input + ) + .fastas + .set { ch_fasta } + } + ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + + // + // SUBWORKFLOW: Download databases and params for Rosettafold-All-Atom + // + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS ( ) + ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.versions) + + if (params.rosettafold-all-atom_mode == 'standard') { + // + // SUBWORKFLOW: Run Rosettafold-All-Atom standard mode + // + RUN_ROSETTAFOLD-ALL-ATOM ( + ch_fasta, + params.full_dbs, + params.rosettafold-all-atom_model_preset, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.params, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.bfd.ifEmpty([]), + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.small_bfd.ifEmpty([]), + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.mgnify, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb70, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_mmcif, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniclust30, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniref90, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_seqres, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniprot, + ) + ch_versions = ch_versions.mix(RUN_ROSETTAFOLD-ALL-ATOM.out.versions) + ch_multiqc_rep = RUN_ROSETTAFOLD-ALL-ATOM.out.multiqc.collect() + } else if (params.rosettafold-all-atom_mode == 'split_msa_prediction') { + // + // SUBWORKFLOW: Run Rosettafold-All-Atom split mode, MSA and prediction + // + RUN_ROSETTAFOLD-ALL-ATOM_MSA ( + ch_fasta, + params.full_dbs, + params.rosettafold-all-atom_model_preset, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.params, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.bfd.ifEmpty([]), + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.small_bfd.ifEmpty([]), + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.mgnify, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb70, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_mmcif, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniclust30, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniref90, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_seqres, + PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniprot + + ) + ch_versions = ch_versions.mix(RUN_ROSETTAFOLD-ALL-ATOM_MSA.out.versions) + + } + + // + // MODULE: Pipeline reporting + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowRosettafold-All-Atom.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + methods_description = WorkflowRosettafold-All-Atom.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + multiqc_report = MULTIQC.out.report.toList() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ From f362ae96390dbff1478279ffeedcdad2e3e8f023 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 16 Oct 2024 15:58:27 +1100 Subject: [PATCH 047/135] Updating main.nf to current master version and adding RFAA lines --- main.nf | 226 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 193 insertions(+), 33 deletions(-) diff --git a/main.nf b/main.nf index eebc1469..c10a303f 100644 --- a/main.nf +++ b/main.nf @@ -1,10 +1,9 @@ #!/usr/bin/env nextflow /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/proteinfold -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/proteinfold - Website: https://nf-co.re/proteinfold Slack : https://nfcore.slack.com/channels/proteinfold ---------------------------------------------------------------------------------------- @@ -13,67 +12,228 @@ nextflow.enable.dsl = 2 /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COLABFOLD PARAMETER VALUES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.colabfold_alphafold2_params = WorkflowMain.getColabfoldAlphafold2Params(params) -params.colabfold_alphafold2_params_path = WorkflowMain.getColabfoldAlphafold2ParamsPath(params) +if (params.mode == "alphafold2") { + include { PREPARE_ALPHAFOLD2_DBS } from './subworkflows/local/prepare_alphafold2_dbs' + include { ALPHAFOLD2 } from './workflows/alphafold2' +} else if (params.mode == "colabfold") { + include { PREPARE_COLABFOLD_DBS } from './subworkflows/local/prepare_colabfold_dbs' + include { COLABFOLD } from './workflows/colabfold' +} else if (params.mode == "esmfold") { + include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' + include { ESMFOLD } from './workflows/esmfold' +} else if (params.mode == "rosettafold-all-atom") { + include { PREPARE_ROSETTAFOLD-ALL-ATOM_DBS } from './subworkflows/local/prepare_rosettafold-all-atom_dbs' ## To be created + include { ROSETTAFOLD-ALL-ATOM } from './workflows/rosettafold-all-atom' +} + +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { getColabfoldAlphafold2Params } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' +include { getColabfoldAlphafold2ParamsPath } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COLABFOLD PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +params.colabfold_alphafold2_params_link = getColabfoldAlphafold2Params() +params.colabfold_alphafold2_params_path = getColabfoldAlphafold2ParamsPath() /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -if (params.mode == "alphafold2") { - include { ALPHAFOLD2 } from './workflows/alphafold2' -} else if (params.mode == "colabfold") { - include { COLABFOLD } from './workflows/colabfold' -} - +// +// WORKFLOW: Run main analysis pipeline +// workflow NFCORE_PROTEINFOLD { + + main: + ch_multiqc = Channel.empty() + ch_versions = Channel.empty() + // // WORKFLOW: Run alphafold2 // if(params.mode == "alphafold2") { - ALPHAFOLD2 () + // + // SUBWORKFLOW: Prepare Alphafold2 DBs + // + PREPARE_ALPHAFOLD2_DBS ( + params.alphafold2_db, + params.full_dbs, + params.bfd_path, + params.small_bfd_path, + params.alphafold2_params_path, + params.mgnify_path, + params.pdb70_path, + params.pdb_mmcif_path, + params.uniref30_alphafold2_path, + params.uniref90_path, + params.pdb_seqres_path, + params.uniprot_path, + params.bfd_link, + params.small_bfd_link, + params.alphafold2_params_link, + params.mgnify_link, + params.pdb70_link, + params.pdb_mmcif_link, + params.pdb_obsolete_link, + params.uniref30_alphafold2_link, + params.uniref90_link, + params.pdb_seqres_link, + params.uniprot_sprot_link, + params.uniprot_trembl_link + ) + ch_versions = ch_versions.mix(PREPARE_ALPHAFOLD2_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/alphafold2 workflow + // + ALPHAFOLD2 ( + ch_versions, + params.full_dbs, + params.alphafold2_mode, + params.alphafold2_model_preset, + PREPARE_ALPHAFOLD2_DBS.out.params, + PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]).first(), + PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]).first(), + PREPARE_ALPHAFOLD2_DBS.out.mgnify, + PREPARE_ALPHAFOLD2_DBS.out.pdb70, + PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, + PREPARE_ALPHAFOLD2_DBS.out.uniref30, + PREPARE_ALPHAFOLD2_DBS.out.uniref90, + PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, + PREPARE_ALPHAFOLD2_DBS.out.uniprot + ) + ch_multiqc = ALPHAFOLD2.out.multiqc_report + ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) } // // WORKFLOW: Run colabfold // - // else if(params.mode == "colabfold_webserver" || params.mode == "colabfold_local") { else if(params.mode == "colabfold") { - COLABFOLD () + // + // SUBWORKFLOW: Prepare Colabfold DBs + // + PREPARE_COLABFOLD_DBS ( + params.colabfold_db, + params.colabfold_server, + params.colabfold_alphafold2_params_path, + params.colabfold_db_path, + params.uniref30_colabfold_path, + params.colabfold_alphafold2_params_link, + params.colabfold_db_link, + params.uniref30_colabfold_link, + params.create_colabfold_index + ) + ch_versions = ch_versions.mix(PREPARE_COLABFOLD_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/colabfold workflow + // + COLABFOLD ( + ch_versions, + params.colabfold_model_preset, + PREPARE_COLABFOLD_DBS.out.params, + PREPARE_COLABFOLD_DBS.out.colabfold_db, + PREPARE_COLABFOLD_DBS.out.uniref30, + params.num_recycles_colabfold + ) + ch_multiqc = COLABFOLD.out.multiqc_report + ch_versions = ch_versions.mix(COLABFOLD.out.versions) } + + // + // WORKFLOW: Run rosettafold-all-atom + // + if(params.mode == "rosettafold-all-atom") { + ROSETTAFOLD-ALL-ATOM () + } + + // + // WORKFLOW: Run esmfold + // + else if(params.mode == "esmfold") { + // + // SUBWORKFLOW: Prepare esmfold DBs + // + PREPARE_ESMFOLD_DBS ( + params.esmfold_db, + params.esmfold_params_path, + params.esmfold_3B_v1, + params.esm2_t36_3B_UR50D, + params.esm2_t36_3B_UR50D_contact_regression + ) + ch_versions = ch_versions.mix(PREPARE_ESMFOLD_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/esmfold workflow + // + ESMFOLD ( + ch_versions, + PREPARE_ESMFOLD_DBS.out.params, + params.num_recycles_esmfold + ) + ch_multiqc = ESMFOLD.out.multiqc_report + ch_versions = ch_versions.mix(ESMFOLD.out.versions) + } + emit: + multiqc_report = ch_multiqc // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [version1, version2, ...] } /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir + ) + + // + // WORKFLOW: Run main workflow + // NFCORE_PROTEINFOLD () + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_PROTEINFOLD.out.multiqc_report + ) } /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ From 22998305e013b5856870323505ae6f0ac60f8d0c Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 18 Oct 2024 17:29:03 +1100 Subject: [PATCH 048/135] Imported subworkflows and fixed formatting errors with RFAA lines --- conf/dbs.config | 13 +- conf/modules_rosettafold-all-atom.config | 15 + main.nf | 49 +- modules/local/run_rosettafold-all-atom.nf | 4 +- nextflow.config | 197 ++++++-- nextflow_schema.json | 259 ++++++++-- subworkflows/local/aria2_uncompress.nf | 9 +- subworkflows/local/prepare_alphafold2_dbs.nf | 108 +++-- subworkflows/local/prepare_colabfold_dbs.nf | 34 +- subworkflows/local/prepare_esmfold_dbs.nf | 64 +++ .../utils_nfcore_proteinfold_pipeline/main.nf | 206 ++++++++ .../nf-core/utils_nextflow_pipeline/main.nf | 126 +++++ .../nf-core/utils_nextflow_pipeline/meta.yml | 38 ++ .../tests/main.function.nf.test | 54 +++ .../tests/main.function.nf.test.snap | 20 + .../tests/main.workflow.nf.test | 111 +++++ .../tests/nextflow.config | 9 + .../utils_nextflow_pipeline/tests/tags.yml | 2 + .../nf-core/utils_nfcore_pipeline/main.nf | 446 ++++++++++++++++++ .../nf-core/utils_nfcore_pipeline/meta.yml | 24 + .../tests/main.function.nf.test | 134 ++++++ .../tests/main.function.nf.test.snap | 166 +++++++ .../tests/main.workflow.nf.test | 29 ++ .../tests/main.workflow.nf.test.snap | 19 + .../tests/nextflow.config | 9 + .../utils_nfcore_pipeline/tests/tags.yml | 2 + .../nf-core/utils_nfvalidation_plugin/main.nf | 62 +++ .../utils_nfvalidation_plugin/meta.yml | 44 ++ .../tests/main.nf.test | 200 ++++++++ .../tests/nextflow_schema.json | 96 ++++ .../utils_nfvalidation_plugin/tests/tags.yml | 2 + workflows/alphafold2.nf | 265 +++++------ ...ld-all-atom.nf => rosettafold_all_atom.nf} | 78 +-- 33 files changed, 2505 insertions(+), 389 deletions(-) create mode 100644 conf/modules_rosettafold-all-atom.config create mode 100644 subworkflows/local/prepare_esmfold_dbs.nf create mode 100644 subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/main.nf create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/meta.yml create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config create mode 100644 subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/main.nf create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/meta.yml create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config create mode 100644 subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/main.nf create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json create mode 100644 subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml rename workflows/{rosettafold-all-atom.nf => rosettafold_all_atom.nf} (63%) diff --git a/conf/dbs.config b/conf/dbs.config index fc81c13b..0166f40d 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -100,12 +100,15 @@ params { "AlphaFold2-multimer-v2" : "alphafold_params_colab_2022-03-02", "AlphaFold2-ptm" : "alphafold_params_2021-07-14" ] - + // RoseTTAFold links - uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' - pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' + uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' + pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' + RFAA_paper_weights = 'http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt' // RoseTTAFold paths - uniref30_variable = "${params.rosettafold_db}/uniref30/" - pdb100_variable = "${params.rosettafold_db}/pdb100/" + uniref30_variable = "${params.rosettafold_all_atom_db}/uniref30/" + pdb100_variable = "${params.rosettafold_all_atom_db}/pdb100/" + bfd_variable = "${params.rosettafold_all_atom_db}/bfd/" + RFAA_paper_weights_variable = "" } diff --git a/conf/modules_rosettafold-all-atom.config b/conf/modules_rosettafold-all-atom.config new file mode 100644 index 00000000..0e871755 --- /dev/null +++ b/conf/modules_rosettafold-all-atom.config @@ -0,0 +1,15 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// General configuration options +// diff --git a/main.nf b/main.nf index c10a303f..683b3218 100644 --- a/main.nf +++ b/main.nf @@ -26,9 +26,9 @@ if (params.mode == "alphafold2") { } else if (params.mode == "esmfold") { include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' include { ESMFOLD } from './workflows/esmfold' -} else if (params.mode == "rosettafold-all-atom") { - include { PREPARE_ROSETTAFOLD-ALL-ATOM_DBS } from './subworkflows/local/prepare_rosettafold-all-atom_dbs' ## To be created - include { ROSETTAFOLD-ALL-ATOM } from './workflows/rosettafold-all-atom' +} else if (params.mode == "rosettafold_all_atom") { + include { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS } from './subworkflows/local/prepare_rosettafold_all_atom_dbs' + include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold' } include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' @@ -153,13 +153,6 @@ workflow NFCORE_PROTEINFOLD { ch_versions = ch_versions.mix(COLABFOLD.out.versions) } - // - // WORKFLOW: Run rosettafold-all-atom - // - if(params.mode == "rosettafold-all-atom") { - ROSETTAFOLD-ALL-ATOM () - } - // // WORKFLOW: Run esmfold // @@ -187,6 +180,42 @@ workflow NFCORE_PROTEINFOLD { ch_multiqc = ESMFOLD.out.multiqc_report ch_versions = ch_versions.mix(ESMFOLD.out.versions) } + + // + // WORKFLOW: Run rosettafold_all_atom + // + else if(params.mode == "rosettafold_all_atom") { + // + // SUBWORKFLOW: Prepare Rosttafold-all-atom DBs + // + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS ( + params.bfd_path, + params.uniref30_rosettafold_all_atom_path, + params.pdb100_path, + params.blast_path, + params.RFAA_paper_weights_path, + params.bfd_link, + params.uniref30_rosettafold_all_atom_link, + params.pdb100_link, + params.blast_link, + params.RFAA_paper_weights_link + ) + ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/rosettafold_all_atom workflow + // + ROSETTAFOLD_ALL_ATOM ( + ch_versions, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.blast, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30_rosettafold_all_atom, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.RFAA_paper_weights + ) + ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report + ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) + } emit: multiqc_report = ch_multiqc // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [version1, version2, ...] diff --git a/modules/local/run_rosettafold-all-atom.nf b/modules/local/run_rosettafold-all-atom.nf index 86df8cd0..ac9ca4be 100644 --- a/modules/local/run_rosettafold-all-atom.nf +++ b/modules/local/run_rosettafold-all-atom.nf @@ -32,7 +32,7 @@ process RUN_ROSETTAFOLD-ALL-ATOM { --env pdb100="${params.pdb100}" \ RoseTTAFold-All-Atom-dev.sif "$file" } -# cp "${file.baseName}"/ranked_0.pdb ./"${file.baseName}".rosettafold-all-atom.pdb +# cp "${file.baseName}"/ranked_0.pdb ./"${file.baseName}".rosettafold_all_atom.pdb # cd "${file.baseName}" # awk '{print \$6"\\t"\$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv # for i in 1 2 3 4 @@ -51,7 +51,7 @@ process RUN_ROSETTAFOLD-ALL-ATOM { stub: "" - touch ./"${file.baseName}".rosettafold-all-atom.pdb + touch ./"${file.baseName}".rosettafold_all_atom.pdb touch ./"${file.baseName}"_mqc.tsv cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index 69cc7ad2..f91e93a0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,13 +11,13 @@ params { // Input options input = null - mode = 'alphafold2' // {alphafold2, colabfold} + mode = 'alphafold2' // {alphafold2, colabfold, esmfold, rosettafold_all_atom} use_gpu = false // Alphafold2 parameters alphafold2_mode = "standard" max_template_date = "2020-05-14" - full_dbs = false // true full_dbs, false reduced_dbs + full_dbs = false // true full_dbs, false reduced_dbs alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} alphafold2_db = null @@ -84,15 +84,15 @@ params { mgnify_path = null pdb70_path = null pdb_mmcif_path = null - uniclust30_path = null + uniref30_alphafold2_path = null uniref90_path = null pdb_seqres_path = null uniprot_path = null // Colabfold parameters colabfold_server = "webserver" - colabfold_model_preset = "AlphaFold2-ptm" // {AlphaFold2-ptm,AlphaFold2-multimer-v1,AlphaFold2-multimer-v2} - num_recycle = 3 + colabfold_model_preset = "alphafold2_ptm" // {'auto', 'alphafold2', 'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} + num_recycles_colabfold = 3 use_amber = true colabfold_db = null db_load_mode = 0 @@ -102,11 +102,34 @@ params { // Colabfold links colabfold_db_link = null - uniref30 = null + uniref30_colabfold_link = null // Colabfold paths colabfold_db_path = null - uniref30_path = null + uniref30_colabfold_path = null + + // Esmfold parameters + esmfold_db = null + esmfold_model_preset = "monomer" + num_recycles_esmfold = 4 + + // Esmfold links + esmfold_3B_v1 = null + esm2_t36_3B_UR50D = null + esm2_t36_3B_UR50D_contact_regression = null + + // Esmfold paths + esmfold_params_path = null + + // Rosettafold-all-atom parameters + rosettafold_all_atom_db = null + uniref30_rosettafold_all_atom_path = null + blast_path = null + pdb100_path = null + RFAA_paper_weights_path = null + + // Process skipping options + skip_multiqc = false // MultiQC options multiqc_config = null @@ -126,6 +149,7 @@ params { hook_url = null help = false version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' validate_params = true show_hidden_params = false schema_ignore_params = 'genomes' @@ -144,6 +168,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = '' + validationShowHiddenParams = false + validate_params = true + } spack.enabled = true @@ -164,35 +195,47 @@ try { } catch (Exception e) { System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") } - - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true - if (params.use_gpu) { docker.runOptions = '--gpus all' } + if (params.use_gpu) { + docker.runOptions = '--gpus all' + } else { + docker.runOptions = '-u $(id -u):$(id -g)' + } + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } arm { if (params.use_gpu) { @@ -205,48 +248,91 @@ profiles { singularity.enabled = true singularity.autoMounts = true if (params.use_gpu) { singularity.runOptions = '--nv' } + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } - test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } - test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } - test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } - test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } + test { includeConfig 'conf/test.config' } + test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } + test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } + test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } + test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } + test_colabfold_download { includeConfig 'conf/test_colabfold_download.config' } + test_esmfold { includeConfig 'conf/test_esmfold.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } + test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } + test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } + test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } + test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } +} + +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -263,6 +349,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -287,8 +376,8 @@ manifest { homePage = 'https://github.com/nf-core/proteinfold' description = """Protein 3D structure prediction pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.0.0' + nextflowVersion = '!>=23.04.0' + version = '1.1.1' doi = '10.5281/zenodo.7629996' } @@ -300,6 +389,10 @@ if (params.mode == 'alphafold2') { includeConfig 'conf/modules_alphafold2.config' } else if (params.mode == 'colabfold') { includeConfig 'conf/modules_colabfold.config' +} else if (params.mode == 'esmfold') { + includeConfig 'conf/modules_esmfold.config' +} else if (params.mode == 'rosettafold_all_atom') { + includeConfig 'conf/modules_rosettafold_all_atom.config' } // Load links to DBs and parameters diff --git a/nextflow_schema.json b/nextflow_schema.json index dbe9832c..b7fa00f8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,10 @@ "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -32,7 +33,7 @@ "type": "string", "default": "alphafold2", "description": "Specifies the mode in which the pipeline will be run", - "enum": ["alphafold2", "colabfold"], + "enum": ["alphafold2", "colabfold", "esmfold"], "fa_icon": "fas fa-cogs" }, "use_gpu": { @@ -68,13 +69,15 @@ }, "alphafold2_db": { "type": "string", + "format": "path", + "exists": true, "description": "Specifies the DB and PARAMS path used by 'AlphaFold2' mode", "fa_icon": "fas fa-database" }, "full_dbs": { "type": "boolean", "default": false, - "description": "If true uses full DBs otherwise, otherwise it uses the reduced version of DBs", + "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", "fa_icon": "fas fa-battery-full" }, "alphafold2_mode": { @@ -86,7 +89,7 @@ }, "alphafold2_model_preset": { "type": "string", - "default": "monomer_ptm", + "default": "monomer", "description": "Model preset for 'AlphaFold2' mode", "enum": ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], "fa_icon": "fas fa-stream" @@ -101,6 +104,8 @@ "properties": { "colabfold_db": { "type": "string", + "format": "path", + "exists": true, "description": "Specifies the PARAMS and DB path used by 'colabfold' mode", "fa_icon": "fas fa-folder-open" }, @@ -113,15 +118,22 @@ }, "colabfold_model_preset": { "type": "string", - "default": "AlphaFold2-ptm", + "default": "alphafold2_ptm", "description": "Model preset for 'colabfold' mode", - "enum": ["AlphaFold2-ptm", "AlphaFold2-multimer-v1", "AlphaFold2-multimer-v2"], + "enum": [ + "auto", + "alphafold2", + "alphafold2_ptm", + "alphafold2_multimer_v1", + "alphafold2_multimer_v2", + "alphafold2_multimer_v3" + ], "fa_icon": "fas fa-stream" }, - "num_recycle": { + "num_recycles_colabfold": { "type": "integer", "default": 3, - "description": "Number of recycles", + "description": "Number of recycles for Colabfold", "fa_icon": "fas fa-recycle" }, "use_amber": { @@ -155,6 +167,61 @@ } } }, + "esmfold_options": { + "title": "Esmfold options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Esmfold options.", + "properties": { + "esmfold_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the PARAMS path used by 'esmfold' mode", + "fa_icon": "fas fa-folder-open" + }, + "num_recycles_esmfold": { + "type": "integer", + "default": 4, + "description": "Specifies the number of recycles used by Esmfold", + "fa_icon": "fas fa-server" + }, + "esmfold_model_preset": { + "type": "string", + "description": "Specifies whether is a 'monomer' or 'multimer' prediction", + "enum": ["monomer", "multimer"], + "fa_icon": "fas fa-stream" + } + } + }, + "rosettafold_all_atom_options": { + "title": "RoseTTAFold-all-atom options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "RoseTTAFold-all-atom options", + "properties": { + "rosettafold_all_atom_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the PARAMS path used by 'rosettafold_all_atom' mode", + "fa_icon": "fas fa-folder-open" + } + } + }, + "process_skipping_options": { + "title": "Process skipping options", + "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", + "properties": { + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC.", + "fa_icon": "fas fa-fast-forward" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -232,7 +299,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -244,75 +311,75 @@ "fa_icon": "fas fa-database", "description": "Parameters used to provide the links to the DBs and parameters public resources to Alphafold2.", "properties": { - "bfd": { + "bfd_link": { "type": "string", "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz", "description": "Link to BFD dababase", "fa_icon": "fas fa-link" }, - "small_bfd": { + "small_bfd_link": { "type": "string", "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz", "description": "Link to a reduced version of the BFD dababase", "fa_icon": "fas fa-link" }, - "alphafold2_params": { + "alphafold2_params_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar", "description": "Link to the Alphafold2 parameters", "fa_icon": "fas fa-link" }, - "mgnify": { + "mgnify_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz", "description": "Link to the MGnify database", "fa_icon": "fas fa-link" }, - "pdb70": { + "pdb70_link": { "type": "string", "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz", "description": "Link to the PDB70 database", "fa_icon": "fas fa-link" }, - "pdb_mmcif": { + "pdb_mmcif_link": { "type": "string", "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/", "description": "Link to the PDB mmCIF database", "fa_icon": "fas fa-link" }, - "pdb_obsolete": { + "pdb_obsolete_link": { "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat", - "description": "Link to the PDV obsolete database", + "default": "https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat", + "description": "Link to the PDB obsolete database", "fa_icon": "fas fa-link" }, - "uniclust30": { + "uniref30_alphafold2_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz", "description": "Link to the Uniclust30 database", "fa_icon": "fas fa-link" }, - "uniref90": { + "uniref90_link": { "type": "string", - "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", "description": "Link to the UniRef90 database", "fa_icon": "fas fa-link" }, - "pdb_seqres": { + "pdb_seqres_link": { "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", + "default": "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", "description": "Link to the PDB SEQRES database", "fa_icon": "fas fa-link" }, - "uniprot_sprot": { + "uniprot_sprot_link": { "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", "description": "Link to the SwissProt UniProt database", "fa_icon": "fas fa-link" }, - "uniprot_trembl": { + "uniprot_trembl_link": { "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", "description": "Link to the TrEMBL UniProt database", "fa_icon": "fas fa-link" } @@ -354,9 +421,9 @@ "description": "Path to the PDB mmCIF database", "fa_icon": "fas fa-folder-open" }, - "uniclust30_path": { + "uniref30_alphafold2_path": { "type": "string", - "description": "Path to the Uniclust30 database", + "description": "Path to the Uniref30 database", "fa_icon": "fas fa-folder-open" }, "uniref90_path": { @@ -388,13 +455,13 @@ "description": "Link to the Colabfold database", "fa_icon": "fas fa-link" }, - "uniref30": { + "uniref30_colabfold_link": { "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2103.tar.gz", + "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz", "description": "Link to the UniRef30 database", "fa_icon": "fas fa-link" }, - "colabfold_alphafold2_params": { + "colabfold_alphafold2_params_link": { "type": "string", "description": "Link to the Alphafold2 parameters for Colabfold", "fa_icon": "fas fa-link" @@ -412,7 +479,7 @@ "description": "Link to the Colabfold database", "fa_icon": "fas fa-folder-open" }, - "uniref30_path": { + "uniref30_colabfold_path": { "type": "string", "description": "Link to the UniRef30 database", "fa_icon": "fas fa-folder-open" @@ -423,12 +490,79 @@ "fa_icon": "fas fa-folder-open" }, "colabfold_alphafold2_params_tags": { - "type": "object", + "type": "string", "description": "Dictionary with Alphafold2 parameters tags", "fa_icon": "fas fa-stream" } } }, + "esmfold_parameters_link_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_3B_v1": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt", + "description": "Link to the Esmfold 3B-v1 model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt", + "description": "Link to the Esmfold t36-3B-UR50D model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D_contact_regression": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt", + "description": "Link to the Esmfold t36-3B-UR50D-contact-regression model", + "fa_icon": "fas fa-link" + } + } + }, + "esmfold_parameters_path_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_params_path": { + "type": "string", + "description": "Link to the Esmfold parameters", + "fa_icon": "fas fa-folder-open" + } + } + }, + "rosettafold_all_atom_dbs_and_parameters_path_options": { + "title": "RosettaFold-All-Atom DBs and parameters path options", + "type": "object", + "description": "Parameters used to provide paths to the databases and parameters for RosettaFold-All-Atom.", + "fa_icon": "fas fa-database", + "properties": { + "uniref30_rosettafold_all_atom_path": { + "type": "string", + "description": "Path to the UniRef30 database for RosettaFold-All-Atom", + "fa_icon": "fas fa-folder-open" + }, + "blast_path": { + "type": "string", + "description": "Path to the BLAST database for RosettaFold-All-Atom", + "fa_icon": "fas fa-folder-open" + }, + "pdb100_path": { + "type": "string", + "description": "Path to the PDB100 database for RosettaFold-All-Atom", + "fa_icon": "fas fa-folder-open" + }, + "RFAA_paper_weights_path": { + "type": "string", + "description": "Path to the weights file used in the RFAA paper for RosettaFold-All-Atom", + "fa_icon": "fas fa-folder-open" + } + } + }, "generic_options": { "title": "Generic options", "type": "object", @@ -494,28 +628,30 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, "multiqc_logo": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", "hidden": true }, "multiqc_methods_description": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -523,12 +659,33 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } @@ -543,6 +700,12 @@ { "$ref": "#/definitions/colabfold_options" }, + { + "$ref": "#/definitions/esmfold_options" + }, + { + "$ref": "#/definitions/process_skipping_options" + }, { "$ref": "#/definitions/institutional_config_options" }, @@ -561,6 +724,12 @@ { "$ref": "#/definitions/colabfold_dbs_and_parameters_path_options" }, + { + "$ref": "#/definitions/esmfold_parameters_link_options" + }, + { + "$ref": "#/definitions/esmfold_parameters_path_options" + }, { "$ref": "#/definitions/generic_options" } diff --git a/subworkflows/local/aria2_uncompress.nf b/subworkflows/local/aria2_uncompress.nf index b2e8a0ca..09a27ff0 100644 --- a/subworkflows/local/aria2_uncompress.nf +++ b/subworkflows/local/aria2_uncompress.nf @@ -13,14 +13,17 @@ workflow ARIA2_UNCOMPRESS { main: ARIA2 ( - source_url + [ + [:], + source_url + ] ) ch_db = Channel.empty() if (source_url.toString().endsWith('.tar') || source_url.toString().endsWith('.tar.gz')) { - ch_db = UNTAR ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).untar.map{ it[1] } + ch_db = UNTAR ( ARIA2.out.downloaded_file ).untar.map{ it[1] } } else if (source_url.toString().endsWith('.gz')) { - ch_db = GUNZIP ( ARIA2.out.downloaded_file.flatten().map{ [ [:], it ] } ).gunzip.map { it[1] } + ch_db = GUNZIP ( ARIA2.out.downloaded_file ).gunzip.map { it[1] } } emit: diff --git a/subworkflows/local/prepare_alphafold2_dbs.nf b/subworkflows/local/prepare_alphafold2_dbs.nf index 20adc42e..4621af6b 100644 --- a/subworkflows/local/prepare_alphafold2_dbs.nf +++ b/subworkflows/local/prepare_alphafold2_dbs.nf @@ -8,106 +8,138 @@ include { ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD ARIA2_UNCOMPRESS as ARIA2_MGNIFY ARIA2_UNCOMPRESS as ARIA2_PDB70 - ARIA2_UNCOMPRESS as ARIA2_UNICLUST30 + ARIA2_UNCOMPRESS as ARIA2_UNIREF30 ARIA2_UNCOMPRESS as ARIA2_UNIREF90 ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT ARIA2_UNCOMPRESS as ARIA2_UNIPROT_TREMBL } from './aria2_uncompress' -include { ARIA2 } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_PDB_SEQRES } from '../../modules/nf-core/aria2/main' + include { COMBINE_UNIPROT } from '../../modules/local/combine_uniprot' include { DOWNLOAD_PDBMMCIF } from '../../modules/local/download_pdbmmcif' workflow PREPARE_ALPHAFOLD2_DBS { + + take: + alphafold2_db // directory: path to alphafold2 DBs + full_dbs // boolean: Use full databases (otherwise reduced version) + bfd_path // directory: /path/to/bfd/ + small_bfd_path // directory: /path/to/small_bfd/ + alphafold2_params_path // directory: /path/to/alphafold2/params/ + mgnify_path // directory: /path/to/mgnify/ + pdb70_path // directory: /path/to/pdb70/ + pdb_mmcif_path // directory: /path/to/pdb_mmcif/ + uniref30_alphafold2_path // directory: /path/to/uniref30/alphafold2/ + uniref90_path // directory: /path/to/uniref90/ + pdb_seqres_path // directory: /path/to/pdb_seqres/ + uniprot_path // directory: /path/to/uniprot/ + bfd_link // string: Specifies the link to download bfd + small_bfd_link // string: Specifies the link to download small_bfd + alphafold2_params_link // string: Specifies the link to download alphafold2_params + mgnify_link // string: Specifies the link to download mgnify + pdb70_link // string: Specifies the link to download pdb70 + pdb_mmcif_link // string: Specifies the link to download pdb_mmcif + pdb_obsolete_link // string: Specifies the link to download pdb_obsolete + uniref30_alphafold2_link // string: Specifies the link to download uniref30_alphafold2 + uniref90_link // string: Specifies the link to download uniref90 + pdb_seqres_link // string: Specifies the link to download pdb_seqres + uniprot_sprot_link // string: Specifies the link to download uniprot_sprot + uniprot_trembl_link // string: Specifies the link to download uniprot_trembl + main: ch_bfd = Channel.empty() ch_small_bfd = Channel.empty() ch_versions = Channel.empty() - if (params.alphafold2_db) { - if (params.full_dbs) { - ch_bfd = file( params.bfd_path ) - ch_small_bfd = file( "${projectDir}/assets/dummy_db" ) + if (alphafold2_db) { + if (full_dbs) { + ch_bfd = Channel.value(file(bfd_path)) + ch_small_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) } else { - ch_bfd = file( "${projectDir}/assets/dummy_db" ) - ch_small_bfd = file( params.small_bfd_path ) + ch_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) + ch_small_bfd = Channel.value(file(small_bfd_path)) } - ch_params = file( params.alphafold2_params_path ) - ch_mgnify = file( params.mgnify_path ) - ch_pdb70 = file( params.pdb70_path, type: 'any' ) - ch_mmcif = file( params.pdb_mmcif_path, type: 'any' ) - ch_uniclust30 = file( params.uniclust30_path, type: 'any' ) - ch_uniref90 = file( params.uniref90_path ) - ch_pdb_seqres = file( params.pdb_seqres_path ) - ch_uniprot = file( params.uniprot_path ) + ch_params = Channel.value(file(alphafold2_params_path)) + ch_mgnify = Channel.value(file(mgnify_path)) + ch_pdb70 = Channel.value(file(pdb70_path, type: 'dir' )) + ch_mmcif_files = file(pdb_mmcif_path, type: 'dir') + ch_mmcif_obsolete = file(pdb_mmcif_path, type: 'file') + ch_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete) + ch_uniref30 = Channel.value(file(uniref30_alphafold2_path, type: 'any')) + ch_uniref90 = Channel.value(file(uniref90_path)) + ch_pdb_seqres = Channel.value(file(pdb_seqres_path)) + ch_uniprot = Channel.value(file(uniprot_path)) } else { - if (params.full_dbs) { + if (full_dbs) { ARIA2_BFD( - params.bfd + bfd_link ) ch_bfd = ARIA2_BFD.out.db ch_versions = ch_versions.mix(ARIA2_BFD.out.versions) } else { ARIA2_SMALL_BFD( - params.small_bfd + small_bfd_link ) ch_small_bfd = ARIA2_SMALL_BFD.out.db ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) } ARIA2_ALPHAFOLD2_PARAMS( - params.alphafold2_params + alphafold2_params_link ) ch_params = ARIA2_ALPHAFOLD2_PARAMS.out.db ch_versions = ch_versions.mix(ARIA2_ALPHAFOLD2_PARAMS.out.versions) ARIA2_MGNIFY( - params.mgnify + mgnify_link ) ch_mgnify = ARIA2_MGNIFY.out.db ch_versions = ch_versions.mix(ARIA2_MGNIFY.out.versions) - ARIA2_PDB70( - params.pdb70 + pdb70_link ) ch_pdb70 = ARIA2_PDB70.out.db ch_versions = ch_versions.mix(ARIA2_PDB70.out.versions) DOWNLOAD_PDBMMCIF( - params.pdb_mmcif, - params.pdb_obsolete + pdb_mmcif_link, + pdb_obsolete_link ) ch_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) - ARIA2_UNICLUST30( - params.uniclust30 + ARIA2_UNIREF30( + uniref30_alphafold2_link ) - ch_uniclust30 = ARIA2_UNICLUST30.out.db - ch_versions = ch_versions.mix(ARIA2_UNICLUST30.out.versions) + ch_uniref30 = ARIA2_UNIREF30.out.db + ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) ARIA2_UNIREF90( - params.uniref90 + uniref90_link ) ch_uniref90 = ARIA2_UNIREF90.out.db ch_versions = ch_versions.mix(ARIA2_UNIREF90.out.versions) - ARIA2 ( - params.pdb_seqres + ARIA2_PDB_SEQRES ( + [ + [:], + pdb_seqres_link + ] ) - ch_pdb_seqres = ARIA2.out.downloaded_file - ch_versions = ch_versions.mix(ARIA2.out.versions) + ch_pdb_seqres = ARIA2_PDB_SEQRES.out.downloaded_file.map{ it[1] } + ch_versions = ch_versions.mix(ARIA2_PDB_SEQRES.out.versions) ARIA2_UNIPROT_SPROT( - params.uniprot_sprot + uniprot_sprot_link ) ch_versions = ch_versions.mix(ARIA2_UNIPROT_SPROT.out.versions) ARIA2_UNIPROT_TREMBL( - params.uniprot_trembl + uniprot_trembl_link ) ch_versions = ch_versions.mix(ARIA2_UNIPROT_TREMBL.out.versions) COMBINE_UNIPROT ( @@ -118,14 +150,14 @@ workflow PREPARE_ALPHAFOLD2_DBS { ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions) } - emit: + emit: bfd = ch_bfd small_bfd = ch_small_bfd params = ch_params mgnify = ch_mgnify pdb70 = ch_pdb70 pdb_mmcif = ch_mmcif - uniclust30 = ch_uniclust30 + uniref30 = ch_uniref30 uniref90 = ch_uniref90 pdb_seqres = ch_pdb_seqres uniprot = ch_uniprot diff --git a/subworkflows/local/prepare_colabfold_dbs.nf b/subworkflows/local/prepare_colabfold_dbs.nf index ef1f45e4..bab0b74c 100644 --- a/subworkflows/local/prepare_colabfold_dbs.nf +++ b/subworkflows/local/prepare_colabfold_dbs.nf @@ -11,29 +11,41 @@ include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_COLABFOLDDB } from '. include { MMSEQS_TSV2EXPROFILEDB as MMSEQS_TSV2EXPROFILEDB_UNIPROT30 } from '../../modules/nf-core/mmseqs/tsv2exprofiledb/main' workflow PREPARE_COLABFOLD_DBS { - main: + + take: + colabfold_db // directory: path/to/colabfold/DBs and params + colabfold_server // string: Specifies the server to use for colabfold + colabfold_alphafold2_params_path // directory: /path/to/colabfold/alphafold2/params/ + colabfold_db_path // directory: /path/to/colabfold/db/ + uniref30_colabfold_path // directory: /path/to/uniref30/colabfold/ + colabfold_alphafold2_params_link // string: Specifies the link to download colabfold alphafold2 params + colabfold_db_link // string: Specifies the link to download colabfold db + uniref30_colabfold_link // string: Specifies the link to download uniref30 + create_colabfold_index // boolean: Create index for colabfold db + + main: ch_params = Channel.empty() ch_colabfold_db = Channel.empty() ch_uniref30 = Channel.empty() ch_versions = Channel.empty() - if (params.colabfold_db) { - ch_params = file( params.colabfold_alphafold2_params_path, type: 'any' ) - if (params.colabfold_server == 'local') { - ch_colabfold_db = file( params.colabfold_db_path, type: 'any' ) - ch_uniref30 = file( params.uniref30_path , type: 'any' ) + if (colabfold_db) { + ch_params = Channel.value(file( colabfold_alphafold2_params_path, type: 'any' )) + if (colabfold_server == 'local') { + ch_colabfold_db = Channel.value(file( colabfold_db_path, type: 'any' )) + ch_uniref30 = Channel.value(file( uniref30_colabfold_path , type: 'any' )) } } else { ARIA2_COLABFOLD_PARAMS ( - params.colabfold_alphafold2_params + colabfold_alphafold2_params_link ) ch_params = ARIA2_COLABFOLD_PARAMS.out.db ch_versions = ch_versions.mix(ARIA2_COLABFOLD_PARAMS.out.versions) if (params.colabfold_server == 'local') { ARIA2_COLABFOLD_DB ( - params.colabfold_db_link + colabfold_db_link ) ch_versions = ch_versions.mix(ARIA2_COLABFOLD_DB.out.versions) @@ -52,7 +64,7 @@ workflow PREPARE_COLABFOLD_DBS { } ARIA2_UNIREF30( - params.uniref30 + uniref30_colabfold_link ) ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) @@ -62,7 +74,7 @@ workflow PREPARE_COLABFOLD_DBS { ch_uniref30 = MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile ch_versions = ch_versions.mix(MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.versions) - if (params.create_colabfold_index) { + if (create_colabfold_index) { MMSEQS_CREATEINDEX_UNIPROT30 ( MMSEQS_TSV2EXPROFILEDB_UNIPROT30.out.db_exprofile ) @@ -72,7 +84,7 @@ workflow PREPARE_COLABFOLD_DBS { } } - emit: + emit: params = ch_params colabfold_db = ch_colabfold_db uniref30 = ch_uniref30 diff --git a/subworkflows/local/prepare_esmfold_dbs.nf b/subworkflows/local/prepare_esmfold_dbs.nf new file mode 100644 index 00000000..decd2875 --- /dev/null +++ b/subworkflows/local/prepare_esmfold_dbs.nf @@ -0,0 +1,64 @@ +// +// Download all the required Esmfold parameters +// + +include { ARIA2 as ARIA2_ESMFOLD_3B_V1 } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D } from '../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION } from '../../modules/nf-core/aria2/main' + +workflow PREPARE_ESMFOLD_DBS { + + take: + esmfold_db // directory: /path/to/esmfold/db/ + esmfold_params_path // directory: /path/to/esmfold/params/ + esmfold_3B_v1 // string: Specifies the link to download esmfold 3B v1 + esm2_t36_3B_UR50D // string: Specifies the link to download esm2 t36 3B UR50D + esm2_t36_3B_UR50D_contact_regression // string: Specifies the link to download esm2 t36 3B UR50D contact regression + + main: + ch_versions = Channel.empty() + + if (esmfold_db) { + ch_params = Channel.value(file( esmfold_params_path, type: 'file' )) + } + else { + ARIA2_ESMFOLD_3B_V1 ( + [ + [:], + esmfold_3B_v1 + ] + ) + ARIA2_ESM2_T36_3B_UR50D ( + [ + [:], + esm2_t36_3B_UR50D + ] + ) + ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION ( + [ + [:], + esm2_t36_3B_UR50D_contact_regression + ] + ) + ch_params = ARIA2_ESMFOLD_3B_V1 + .out + .downloaded_file + .map{ it[1] } + .mix( + ARIA2_ESM2_T36_3B_UR50D + .out + .downloaded_file + .map{ it[1] }, + ARIA2_ESM2_T36_3B_UR50D_CONTACT_REGRESSION + .out + .downloaded_file + .map{ it[1] }) + .collect() + + ch_versions = ch_versions.mix(ARIA2_ESMFOLD_3B_V1.out.versions) + } + + emit: + params = ch_params + versions = ch_versions +} diff --git a/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf new file mode 100644 index 00000000..742d460a --- /dev/null +++ b/subworkflows/local/utils_nfcore_proteinfold_pipeline/main.nf @@ -0,0 +1,206 @@ +// +// Subworkflow with functionality specific to the nf-core/proteinfold pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Get link to Colabfold Alphafold2 parameters +// +def getColabfoldAlphafold2Params() { + def link = null + if (params.colabfold_alphafold2_params_tags) { + if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { + link = "https://storage.googleapis.com/alphafold/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + '.tar' + } + } + return link +} + +// +// Get path to Colabfold Alphafold2 parameters +// +def getColabfoldAlphafold2ParamsPath() { + def path = null + params.colabfold_model_preset.toString() + if (params.colabfold_alphafold2_params_tags) { + if (params.colabfold_alphafold2_params_tags.containsKey(params.colabfold_model_preset.toString())) { + path = "${params.colabfold_db}/params/" + params.colabfold_alphafold2_params_tags[ params.colabfold_model_preset.toString() ] + } + } + return path +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 67a53a60..9a1aebae 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -1,35 +1,3 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowAlphafold2.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.input, - params.alphafold2_db -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input file not specified!' } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -37,13 +5,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil */ // -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_ALPHAFOLD2_DBS } from '../subworkflows/local/prepare_alphafold2_dbs' - -// -// MODULE: Local to the pipeline +// MODULE: Loaded from modules/local/ // include { RUN_ALPHAFOLD2 } from '../modules/local/run_alphafold2' include { RUN_ALPHAFOLD2_MSA } from '../modules/local/run_alphafold2_msa' @@ -58,8 +20,16 @@ include { RUN_ALPHAFOLD2_PRED } from '../modules/local/run_alphafold2_pred' // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -67,151 +37,146 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow ALPHAFOLD2 { - ch_versions = Channel.empty() + take: + ch_versions // channel: [ path(versions.yml) ] + full_dbs // boolean: Use full databases (otherwise reduced version) + alphafold2_mode // string: Mode to run Alphafold2 in + alphafold2_model_preset // string: Specifies the model preset to use for Alphafold2 + ch_alphafold2_params // channel: path(alphafold2_params) + ch_bfd // channel: path(bfd) + ch_small_bfd // channel: path(small_bfd) + ch_mgnify // channel: path(mgnify) + ch_pdb70 // channel: path(pdb70) + ch_pdb_mmcif // channel: path(pdb_mmcif) + ch_uniref30 // channel: path(uniref30) + ch_uniref90 // channel: path(uniref90) + ch_pdb_seqres // channel: path(pdb_seqres) + ch_uniprot // channel: path(uniprot) + + main: + ch_multiqc_files = Channel.empty() // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // Create input channel from input file provided through params.input // - if (params.alphafold2_model_preset != 'multimer') { - INPUT_CHECK ( - ch_input - ) - .fastas - .map { - meta, fasta -> - [ meta, fasta.splitFasta(file:true) ] - } - .transpose() + Channel + .fromSamplesheet("input") .set { ch_fasta } - } else { - INPUT_CHECK ( - ch_input - ) - .fastas - .set { ch_fasta } - } - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // - // SUBWORKFLOW: Download databases and params for Alphafold2 - // - PREPARE_ALPHAFOLD2_DBS ( ) - ch_versions = ch_versions.mix(PREPARE_ALPHAFOLD2_DBS.out.versions) + if (alphafold2_model_preset != 'multimer') { + ch_fasta + .map { + meta, fasta -> + [ meta, fasta.splitFasta(file:true) ] + } + .transpose() + .set { ch_fasta } + } - if (params.alphafold2_mode == 'standard') { + if (alphafold2_mode == 'standard') { // // SUBWORKFLOW: Run Alphafold2 standard mode // RUN_ALPHAFOLD2 ( ch_fasta, - params.full_dbs, - params.alphafold2_model_preset, - PREPARE_ALPHAFOLD2_DBS.out.params, - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.mgnify, - PREPARE_ALPHAFOLD2_DBS.out.pdb70, - PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, - PREPARE_ALPHAFOLD2_DBS.out.uniclust30, - PREPARE_ALPHAFOLD2_DBS.out.uniref90, - PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, - PREPARE_ALPHAFOLD2_DBS.out.uniprot, + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot ) - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) ch_multiqc_rep = RUN_ALPHAFOLD2.out.multiqc.collect() - } else if (params.alphafold2_mode == 'split_msa_prediction') { + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) + + } else if (alphafold2_mode == 'split_msa_prediction') { // // SUBWORKFLOW: Run Alphafold2 split mode, MSA and predicition // RUN_ALPHAFOLD2_MSA ( ch_fasta, - params.full_dbs, - params.alphafold2_model_preset, - PREPARE_ALPHAFOLD2_DBS.out.params, - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.mgnify, - PREPARE_ALPHAFOLD2_DBS.out.pdb70, - PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, - PREPARE_ALPHAFOLD2_DBS.out.uniclust30, - PREPARE_ALPHAFOLD2_DBS.out.uniref90, - PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, - PREPARE_ALPHAFOLD2_DBS.out.uniprot - + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot ) - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_MSA.out.versions) + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_MSA.out.versions) RUN_ALPHAFOLD2_PRED ( ch_fasta, - params.full_dbs, - params.alphafold2_model_preset, - PREPARE_ALPHAFOLD2_DBS.out.params, - PREPARE_ALPHAFOLD2_DBS.out.bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ALPHAFOLD2_DBS.out.mgnify, - PREPARE_ALPHAFOLD2_DBS.out.pdb70, - PREPARE_ALPHAFOLD2_DBS.out.pdb_mmcif, - PREPARE_ALPHAFOLD2_DBS.out.uniclust30, - PREPARE_ALPHAFOLD2_DBS.out.uniref90, - PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, - PREPARE_ALPHAFOLD2_DBS.out.uniprot, + full_dbs, + alphafold2_model_preset, + ch_alphafold2_params, + ch_bfd, + ch_small_bfd, + ch_mgnify, + ch_pdb70, + ch_pdb_mmcif, + ch_uniref30, + ch_uniref90, + ch_pdb_seqres, + ch_uniprot, RUN_ALPHAFOLD2_MSA.out.features - ) - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) ch_multiqc_rep = RUN_ALPHAFOLD2_PRED.out.multiqc.collect() + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) } // - // MODULE: Pipeline reporting + // Collate and save software versions // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowAlphafold2.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowAlphafold2.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* diff --git a/workflows/rosettafold-all-atom.nf b/workflows/rosettafold_all_atom.nf similarity index 63% rename from workflows/rosettafold-all-atom.nf rename to workflows/rosettafold_all_atom.nf index 87464bee..28021c75 100644 --- a/workflows/rosettafold-all-atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -1,4 +1,4 @@ -## Currently just based on the AF2 .nf workflow, requires modification. +// Currently just based on the AF2 .nf workflow, requires modification. /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -14,7 +14,7 @@ WorkflowRosettafold-All-Atom.initialise(params, log) // Check input path parameters to see if they exist def checkPathParamList = [ params.input, - params.rosettafold-all-atom_db + params.rosettafold_all_atom_db ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -41,13 +41,13 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/rfaa_input_check' ## Doesn't exist, RFAA takes different inputs than AF2 -include { PREPARE_ROSETTAFOLD-ALL-ATOM_DBS } from '../subworkflows/local/prepare_rosettafold-all-atom_dbs' ## Doesn't exist +include { INPUT_CHECK } from '../subworkflows/local/rfaa_input_check' // Doesn't exist, RFAA takes different inputs than AF2 +include { PREPARE_ROSETTAFOLD-ALL-ATOM_DBS } from '../subworkflows/local/prepare_rosettafold_all_atom_dbs' // Doesn't exist // // MODULE: Local to the pipeline // -include { RUN_ROSETTAFOLD-ALL-ATOM } from '../modules/local/run_rosettafold-all-atom' +include { RUN_ROSETTAFOLD-ALL-ATOM } from '../modules/local/run_rosettafold_all_atom' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -74,80 +74,12 @@ workflow ROSETTAFOLD-ALL-ATOM { ch_versions = Channel.empty() - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - if (params.rosettafold-all-atom_model_preset != 'multimer') { - INPUT_CHECK ( - ch_input - ) - .fastas - .map { - meta, fasta -> - [ meta, fasta.splitFasta(file:true) ] - } - .transpose() - .set { ch_fasta } - } else { - INPUT_CHECK ( - ch_input - ) - .fastas - .set { ch_fasta } - } - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // // SUBWORKFLOW: Download databases and params for Rosettafold-All-Atom // PREPARE_ROSETTAFOLD-ALL-ATOM_DBS ( ) ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.versions) - if (params.rosettafold-all-atom_mode == 'standard') { - // - // SUBWORKFLOW: Run Rosettafold-All-Atom standard mode - // - RUN_ROSETTAFOLD-ALL-ATOM ( - ch_fasta, - params.full_dbs, - params.rosettafold-all-atom_model_preset, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.params, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.bfd.ifEmpty([]), - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.mgnify, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb70, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_mmcif, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniclust30, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniref90, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_seqres, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniprot, - ) - ch_versions = ch_versions.mix(RUN_ROSETTAFOLD-ALL-ATOM.out.versions) - ch_multiqc_rep = RUN_ROSETTAFOLD-ALL-ATOM.out.multiqc.collect() - } else if (params.rosettafold-all-atom_mode == 'split_msa_prediction') { - // - // SUBWORKFLOW: Run Rosettafold-All-Atom split mode, MSA and prediction - // - RUN_ROSETTAFOLD-ALL-ATOM_MSA ( - ch_fasta, - params.full_dbs, - params.rosettafold-all-atom_model_preset, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.params, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.bfd.ifEmpty([]), - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.small_bfd.ifEmpty([]), - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.mgnify, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb70, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_mmcif, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniclust30, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniref90, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.pdb_seqres, - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.uniprot - - ) - ch_versions = ch_versions.mix(RUN_ROSETTAFOLD-ALL-ATOM_MSA.out.versions) - - } - // // MODULE: Pipeline reporting // From 4de580a1266c704da5b0f60cb2f978099a838295 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 21 Oct 2024 15:30:42 +1100 Subject: [PATCH 049/135] Adjusted naming to snake_case for compatibility, various minor changes to scripts to get RFAA working modified: conf/dbs.config renamed: conf/modules_rosettafold-all-atom.config -> conf/modules_rosettafold_all_atom.config modified: main.nf renamed: modules/local/run_rosettafold-all-atom.nf -> modules/local/run_rosettafold_all_atom.nf new file: subworkflows/local/prepare_rosettafold_all_atom_dbs.nf modified: workflows/rosettafold_all_atom.nf --- conf/dbs.config | 3 +- ...ig => modules_rosettafold_all_atom.config} | 0 main.nf | 2 +- ...ll-atom.nf => run_rosettafold_all_atom.nf} | 33 ++-- .../local/prepare_rosettafold_all_atom_dbs.nf | 61 +++++++ workflows/rosettafold_all_atom.nf | 154 ++++++++---------- 6 files changed, 141 insertions(+), 112 deletions(-) rename conf/{modules_rosettafold-all-atom.config => modules_rosettafold_all_atom.config} (100%) rename modules/local/{run_rosettafold-all-atom.nf => run_rosettafold_all_atom.nf} (50%) create mode 100644 subworkflows/local/prepare_rosettafold_all_atom_dbs.nf diff --git a/conf/dbs.config b/conf/dbs.config index 0166f40d..5a54a916 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -108,7 +108,8 @@ params { // RoseTTAFold paths uniref30_variable = "${params.rosettafold_all_atom_db}/uniref30/" - pdb100_variable = "${params.rosettafold_all_atom_db}/pdb100/" + pdb100_path = "${params.rosettafold_all_atom_db}/pdb100/" bfd_variable = "${params.rosettafold_all_atom_db}/bfd/" RFAA_paper_weights_variable = "" + blast_path = "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" } diff --git a/conf/modules_rosettafold-all-atom.config b/conf/modules_rosettafold_all_atom.config similarity index 100% rename from conf/modules_rosettafold-all-atom.config rename to conf/modules_rosettafold_all_atom.config diff --git a/main.nf b/main.nf index 683b3218..34c02934 100644 --- a/main.nf +++ b/main.nf @@ -28,7 +28,7 @@ if (params.mode == "alphafold2") { include { ESMFOLD } from './workflows/esmfold' } else if (params.mode == "rosettafold_all_atom") { include { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS } from './subworkflows/local/prepare_rosettafold_all_atom_dbs' - include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold' + include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold_all_atom' } include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' diff --git a/modules/local/run_rosettafold-all-atom.nf b/modules/local/run_rosettafold_all_atom.nf similarity index 50% rename from modules/local/run_rosettafold-all-atom.nf rename to modules/local/run_rosettafold_all_atom.nf index ac9ca4be..9e288d33 100644 --- a/modules/local/run_rosettafold-all-atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -1,16 +1,16 @@ /* - * Run RoseTTAFold-All-Atom + * Run RoseTTAFold_All_Atom */ -process RUN_ROSETTAFOLD-ALL-ATOM { +process RUN_ROSETTAFOLD_ALL_ATOM { tag "$meta.id" label 'process_medium' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local RUN_ROSETTAFOLD-ALL-ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") + error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "RoseTTAFold-All-Atom.sif" + container "RoseTTAFold_All_Atom.sif" input: tuple val(meta), path(file) @@ -23,34 +23,23 @@ process RUN_ROSETTAFOLD-ALL-ATOM { when: task.ext.when == null || task.ext.when -### Need to modify the DB variables to match dbs.config script: + """ apptainer run --nv -B /mnt/af2,/srv \ --env blast_path="${params.blast_path}" \ --env bfd_path="${params.bfd_path}" \ - --env uniref30_path="${params.uniref30_path}" \ - --env pdb100="${params.pdb100}" \ - RoseTTAFold-All-Atom-dev.sif "$file" - } -# cp "${file.baseName}"/ranked_0.pdb ./"${file.baseName}".rosettafold_all_atom.pdb -# cd "${file.baseName}" -# awk '{print \$6"\\t"\$11}' ranked_0.pdb | uniq > ranked_0_plddt.tsv -# for i in 1 2 3 4 -# do awk '{print \$6"\\t"\$11}' ranked_\$i.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv -# done -# paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv -# echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv -# cat header.tsv plddt.tsv > ../"${file.baseName}"_plddt_mqc.tsv -# cd .. - - cat <<-END_VERSIONS > versions.yml + --env uniref30_path="${params.uniref30_variable}" \ + --env pdb100="${params.pdb100_path}" \ + RoseTTAFold_All_Atom.sif "$file" + + cat <<-END_VERSIONS > versions.yaml "${task.process}": python: \$(python3 --version | sed 's/Python //g') END_VERSIONS """ stub: - "" + """ touch ./"${file.baseName}".rosettafold_all_atom.pdb touch ./"${file.baseName}"_mqc.tsv diff --git a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf new file mode 100644 index 00000000..36f02e9d --- /dev/null +++ b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf @@ -0,0 +1,61 @@ +// +// Download all the required Rosettafold-All-Atom databases and parameters +// + +include { + ARIA2_UNCOMPRESS as ARIA2_BFD + ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD + ARIA2_UNCOMPRESS as ARIA2_UNIREF30} from './aria2_uncompress' + +workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { + + take: + rosettafold_all_atom_db // directory: path to rosettafold_all_atom DBs + bfd_path // directory: /path/to/bfd/ + small_bfd_path // directory: /path/to/small_bfd/ + uniref30_rosettafold_all_atom_path // directory: /path/to/uniref30/rosettafold_all_atom/ + bfd_link // string: Specifies the link to download bfd + small_bfd_link // string: Specifies the link to download small_bfd + uniref30_rosettafold_all_atom_link // string: Specifies the link to download uniref30_rosettafold_all_atom + + main: + ch_bfd = Channel.empty() + ch_small_bfd = Channel.empty() + ch_versions = Channel.empty() + + + if (rosettafold_all_atom_db) { + ch_bfd = Channel.value(file(bfd_path)) + ch_small_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) + ch_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) + ch_small_bfd = Channel.value(file(small_bfd_path)) + ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path, type: 'any')) + } + else { + if (full_dbs) { + ARIA2_BFD( + bfd_link + ) + ch_bfd = ARIA2_BFD.out.db + ch_versions = ch_versions.mix(ARIA2_BFD.out.versions) + } else { + ARIA2_SMALL_BFD( + small_bfd_link + ) + ch_small_bfd = ARIA2_SMALL_BFD.out.db + ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) + } + + ARIA2_UNIREF30( + uniref30_rosettafold_all_atom_link + ) + ch_uniref30 = ARIA2_UNIREF30.out.db + ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) + } + + emit: + bfd = ch_bfd + small_bfd = ch_small_bfd + uniref30 = ch_uniref30 + versions = ch_versions +} diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 28021c75..76704f99 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -1,37 +1,3 @@ -// Currently just based on the AF2 .nf workflow, requires modification. - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowRosettafold-All-Atom.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.input, - params.rosettafold_all_atom_db -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input file not specified!' } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -39,15 +5,9 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil */ // -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// MODULE: Loaded from modules/local/ // -include { INPUT_CHECK } from '../subworkflows/local/rfaa_input_check' // Doesn't exist, RFAA takes different inputs than AF2 -include { PREPARE_ROSETTAFOLD-ALL-ATOM_DBS } from '../subworkflows/local/prepare_rosettafold_all_atom_dbs' // Doesn't exist - -// -// MODULE: Local to the pipeline -// -include { RUN_ROSETTAFOLD-ALL-ATOM } from '../modules/local/run_rosettafold_all_atom' +include { RUN_ROSETTAFOLD_ALL_ATOM } from '../modules/local/run_rosettafold_all_atom' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -58,8 +18,15 @@ include { RUN_ROSETTAFOLD-ALL-ATOM } from '../modules/local/run_rosettafold // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -67,64 +34,75 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] +workflow ROSETTAFOLD_ALL_ATOM { -workflow ROSETTAFOLD-ALL-ATOM { + take: + ch_versions // channel: [ path(versions.yml) ] + ch_bfd // channel: path(bfd) + ch_small_bfd // channel: path(small_bfd) + ch_uniref30 // channel: path(uniref30) - ch_versions = Channel.empty() + main: + ch_multiqc_files = Channel.empty() // - // SUBWORKFLOW: Download databases and params for Rosettafold-All-Atom + // Create input channel from input file provided through params.input // - PREPARE_ROSETTAFOLD-ALL-ATOM_DBS ( ) - ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD-ALL-ATOM_DBS.out.versions) + Channel + .fromPath(params.input) + .set { ch_file } // - // MODULE: Pipeline reporting + // SUBWORKFLOW: Run Rosettafold_All_Atom standard mode // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') + RUN_ROSETTAFOLD_ALL_ATOM ( + ch_file, + ch_bfd, + ch_small_bfd, + ch_uniref30, ) + ch_multiqc_rep = RUN_ROSETTAFOLD_ALL_ATOM.out.multiqc.collect() + ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) // - // MODULE: MultiQC + // Collate and save software versions // - workflow_summary = WorkflowRosettafold-All-Atom.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowRosettafold-All-Atom.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* From 7faa62a4dfa49102f08cf7b06a0879837cb4b8b6 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 21 Oct 2024 15:47:58 +1100 Subject: [PATCH 050/135] Added schema support for rosetta_fold_all_atom mode and .yaml or .yml input files --- nextflow_schema.json | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b7fa00f8..cb4fa549 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -18,7 +18,7 @@ "exists": true, "schema": "assets/schema_input.json", "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", + "pattern": "^\\S+\\.(csv|yaml|yml)$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -33,7 +33,7 @@ "type": "string", "default": "alphafold2", "description": "Specifies the mode in which the pipeline will be run", - "enum": ["alphafold2", "colabfold", "esmfold"], + "enum": ["alphafold2", "colabfold", "esmfold", "rosettafold_all_atom"], "fa_icon": "fas fa-cogs" }, "use_gpu": { @@ -195,10 +195,10 @@ } }, "rosettafold_all_atom_options": { - "title": "RoseTTAFold-all-atom options", + "title": "RoseTTAFold_all_atom options", "type": "object", "fa_icon": "fas fa-coins", - "description": "RoseTTAFold-all-atom options", + "description": "RoseTTAFold_all_atom options", "properties": { "rosettafold_all_atom_db": { "type": "string", @@ -536,29 +536,29 @@ } }, "rosettafold_all_atom_dbs_and_parameters_path_options": { - "title": "RosettaFold-All-Atom DBs and parameters path options", + "title": "RosettaFold_All_Atom DBs and parameters path options", "type": "object", - "description": "Parameters used to provide paths to the databases and parameters for RosettaFold-All-Atom.", + "description": "Parameters used to provide paths to the databases and parameters for RosettaFold_All_Atom.", "fa_icon": "fas fa-database", "properties": { "uniref30_rosettafold_all_atom_path": { "type": "string", - "description": "Path to the UniRef30 database for RosettaFold-All-Atom", + "description": "Path to the UniRef30 database for RosettaFold_All_Atom", "fa_icon": "fas fa-folder-open" }, "blast_path": { "type": "string", - "description": "Path to the BLAST database for RosettaFold-All-Atom", + "description": "Path to the BLAST database for RosettaFold_All_Atom", "fa_icon": "fas fa-folder-open" }, "pdb100_path": { "type": "string", - "description": "Path to the PDB100 database for RosettaFold-All-Atom", + "description": "Path to the PDB100 database for RosettaFold_All_Atom", "fa_icon": "fas fa-folder-open" }, "RFAA_paper_weights_path": { "type": "string", - "description": "Path to the weights file used in the RFAA paper for RosettaFold-All-Atom", + "description": "Path to the weights file used in the RFAA paper for RosettaFold_All_Atom", "fa_icon": "fas fa-folder-open" } } @@ -703,6 +703,9 @@ { "$ref": "#/definitions/esmfold_options" }, + { + "$ref": "#/definitions/rosettafold_all_atom_options" + }, { "$ref": "#/definitions/process_skipping_options" }, @@ -730,6 +733,9 @@ { "$ref": "#/definitions/esmfold_parameters_path_options" }, + { + "$ref": "#/definitions/rosettafold_all_atom_dbs_and_parameters_path_options" + }, { "$ref": "#/definitions/generic_options" } From 375e33068031811d0f340f3995f84cef96823e3e Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 21 Oct 2024 16:17:39 +1100 Subject: [PATCH 051/135] modified: assets/schema_input.json --- assets/schema_input.json | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 29bca5dd..c261ae58 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,5 +1,5 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/proteinfold/master/assets/schema_input.json", "title": "nf-core/proteinfold pipeline - params.input schema", "description": "Schema for the file provided with params.input", @@ -7,28 +7,18 @@ "items": { "type": "object", "properties": { - "sample": { + "sequence": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sequence name must be provided and cannot contain spaces", + "meta": ["id"] }, - "fastq_1": { + "fasta": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" - }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.fa(sta)?$", + "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'" } }, "required": ["sequence", "fasta"] From 6fdb11175e624424ac88c5e7c40f80ccaba5267e Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 21 Oct 2024 16:45:05 +1100 Subject: [PATCH 052/135] Updating input to work with .yaml https://github.com/Australian-Structural-Biology-Computing/proteinfold/issues/19 modified: assets/schema_input.json modified: modules/local/run_rosettafold_all_atom.nf modified: nextflow_schema.json --- assets/schema_input.json | 4 ++-- modules/local/run_rosettafold_all_atom.nf | 14 +++++++------- nextflow_schema.json | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index c261ae58..49e61a92 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -17,8 +17,8 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.fa(sta)?$", - "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'" + "pattern": "^\\S+\\.(fa(sta)?|yaml|yml)$", + "errorMessage": "Fasta or yaml file must be provided, cannot contain spaces and must have extension '.fa', '.fasta', '.yaml' or '.yml'" } }, "required": ["sequence", "fasta"] diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 9e288d33..61880dda 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -13,12 +13,12 @@ process RUN_ROSETTAFOLD_ALL_ATOM { container "RoseTTAFold_All_Atom.sif" input: - tuple val(meta), path(file) + tuple val(meta), path(fasta) output: - path ("${file.baseName}*") + path ("${fasta.baseName}*") path "*_mqc.tsv", emit: multiqc - path "versions.yaml", emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -30,9 +30,9 @@ process RUN_ROSETTAFOLD_ALL_ATOM { --env bfd_path="${params.bfd_path}" \ --env uniref30_path="${params.uniref30_variable}" \ --env pdb100="${params.pdb100_path}" \ - RoseTTAFold_All_Atom.sif "$file" + RoseTTAFold_All_Atom.sif "$fasta" - cat <<-END_VERSIONS > versions.yaml + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') END_VERSIONS @@ -40,8 +40,8 @@ process RUN_ROSETTAFOLD_ALL_ATOM { stub: """ - touch ./"${file.baseName}".rosettafold_all_atom.pdb - touch ./"${file.baseName}"_mqc.tsv + touch ./"${fasta.baseName}".rosettafold_all_atom.pdb + touch ./"${fasta.baseName}"_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow_schema.json b/nextflow_schema.json index cb4fa549..17467d3f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -18,7 +18,7 @@ "exists": true, "schema": "assets/schema_input.json", "mimetype": "text/csv", - "pattern": "^\\S+\\.(csv|yaml|yml)$", + "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" From 3a9d7f2f40077a1fc6039d52970785bf3de7b0ea Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 12:30:31 +1100 Subject: [PATCH 053/135] Merging --- conf/base.config | 4 - conf/dbs.config | 31 +-- modules/local/run_alphafold2.nf | 13 - modules/local/run_alphafold2_msa.nf | 12 - nextflow.config | 263 ++++++++------------ nextflow.config.1 | 373 ++++++++++++++++++++++++++++ 6 files changed, 479 insertions(+), 217 deletions(-) create mode 100644 nextflow.config.1 diff --git a/conf/base.config b/conf/base.config index 86cc7b94..533fa056 100644 --- a/conf/base.config +++ b/conf/base.config @@ -19,13 +19,9 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } -<<<<<<< HEAD //executor = 'pbspro' - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } -======= errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } ->>>>>>> master maxRetries = 1 maxErrors = '-1' diff --git a/conf/dbs.config b/conf/dbs.config index c1a1e718..8368946c 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -9,7 +9,6 @@ params { // AlphaFold2 links -<<<<<<< HEAD bfd = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' small_bfd = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' alphafold2_params = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar' @@ -88,32 +87,6 @@ params { template_mmcif_dir = "${params.alphafold2_db}/${mmcif_files_name}/" //obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" obsolete_pdbs_path = "${params.alphafold2_db}/${mmcif_obsolete_name}" -======= - bfd_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' - small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' - alphafold2_params_link = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar' - mgnify_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz' - pdb70_link = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' - pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //Other sources available: 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ - pdb_obsolete_link = 'https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat' - uniref30_alphafold2_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz' - uniref90_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' - pdb_seqres_link = 'https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' - uniprot_sprot_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' - uniprot_trembl_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - - // Alphafold paths - bfd_path = "${params.alphafold2_db}/bfd/*" - small_bfd_path = "${params.alphafold2_db}/small_bfd/*" - alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" - mgnify_path = "${params.alphafold2_db}/mgnify/*" - pdb70_path = "${params.alphafold2_db}/pdb70/**" - pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*" - uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" - uniref90_path = "${params.alphafold2_db}/uniref90/*" - pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" - uniprot_path = "${params.alphafold2_db}/uniprot/*" ->>>>>>> master // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' @@ -129,7 +102,6 @@ params { "alphafold2_ptm" : "alphafold_params_2021-07-14" ] -<<<<<<< HEAD // RoseTTAFold links uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' @@ -141,7 +113,7 @@ params { bfd_variable = "${params.rosettafold_all_atom_db}/bfd/" RFAA_paper_weights_variable = "" blast_path = "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" -======= + // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' esm2_t36_3B_UR50D = 'https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt' @@ -149,5 +121,4 @@ params { // Esmfold paths esmfold_params_path = "${params.esmfold_db}/*" ->>>>>>> master } diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index a8f1bf38..481a717e 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -37,13 +37,8 @@ process RUN_ALPHAFOLD2 { script: def args = task.ext.args ?: '' -<<<<<<< HEAD def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" : "reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta" -======= - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : - "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" ->>>>>>> master if (alphafold2_model_preset == 'multimer') { alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " } @@ -65,18 +60,10 @@ process RUN_ALPHAFOLD2 { --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ -<<<<<<< HEAD --uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \ --mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \ --template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \ --obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \ -======= - --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ ->>>>>>> master - --random_seed=53343 \ --use_gpu_relax \ $args diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index db2af2c3..7878d9d1 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -37,13 +37,8 @@ process RUN_ALPHAFOLD2_MSA { script: def args = task.ext.args ?: '' -<<<<<<< HEAD def db_preset = db_preset ? "full_dbs --bfd_database_path=${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=${params.alphafold2_db}/uniref30/UniRef30_2021_03" : "reduced_dbs --small_bfd_database_path=${params.alphafold2_db}/small_bfd/bfd-first_non_consensus_sequences.fasta" -======= - def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : - "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" ->>>>>>> master if (alphafold2_model_preset == 'multimer') { alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " } @@ -62,17 +57,10 @@ process RUN_ALPHAFOLD2_MSA { --db_preset=${db_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ -<<<<<<< HEAD --uniref90_database_path=${params.alphafold2_db}/uniref90/uniref90.fasta \ --mgnify_database_path=${params.alphafold2_db}/mgnify/mgy_clusters_2022_05.fa \ --template_mmcif_dir=${params.alphafold2_db}/pdb_mmcif/mmcif_files \ --obsolete_pdbs_path=${params.alphafold2_db}/pdb_mmcif/obsolete.dat \ -======= - --uniref90_database_path=./uniref90/uniref90.fasta \ - --mgnify_database_path=./mgnify/mgy_clusters_2022_05.fa \ - --template_mmcif_dir=./pdb_mmcif/mmcif_files \ - --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ ->>>>>>> master $args cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl diff --git a/nextflow.config b/nextflow.config index f91e93a0..4b3c2977 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,62 +20,20 @@ params { full_dbs = false // true full_dbs, false reduced_dbs alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} alphafold2_db = null - - // Database prefixes - bfd_prefix = null - smallbfd_prefix = null - mgnify_prefix = null - pdb70_prefix = null - pdb_mmcif_prefix = null - uniclust30_prefix = null - uniref90_prefix = null - pdb_seq_prefix = null - uniprot_prefix = null - alphafold_params_prefix = null - mmcif_path = null - mmcif_obsolete = null - uniclust30_db = null - bfd_first_non_consensus_sequences = null - uniprot_fasta = null - pdb_seqres_txt = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null - uniref90_fasta = null - mgy_clusters_fasta = null - uniclust30_prefix = null - - bfd_name = null - smallbfd_name = null - mgnify_name = null - pdb70_name = null - pdb_mmcif_name = null - uniclust30_name = null - uniref90_name = null - pdb_seqres_name = null - uniprot_name = null - alphafold_params_name = null - mmcif_files_name = null - mmcif_obsolete_name = null - uniclust30_db_name = null - bfd_first_non_consensus_sequences_name = null - uniprot_fasta_name = null - pdb_seqres_txt_name = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null - uniref90_fasta_name = null - mgy_clusters_fasta_name = null // Alphafold2 links - bfd = null - small_bfd = null - alphafold2_params = null - mgnify = null - pdb70 = null - pdb_mmcif = null - pdb_obsolete = null - uniclust30 = null - uniref90 = null - pdb_seqres = null - uniprot_sprot = null - uniprot_trembl = null + bfd_link = null + small_bfd_link = null + alphafold2_params_link = null + mgnify_link = null + pdb70_link = null + pdb_mmcif_link = null + pdb_obsolete_link = null + uniref30_alphafold2_link = null + uniref90_link = null + pdb_seqres_link = null + uniprot_sprot_link = null + uniprot_trembl_link = null // Alphafold2 paths bfd_path = null @@ -130,6 +88,7 @@ params { // Process skipping options skip_multiqc = false + skip_visualisation = false // MultiQC options multiqc_config = null @@ -139,62 +98,36 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + config_profile_name = null + config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = '' - validationShowHiddenParams = false - validate_params = true + validate_params = true } -spack.enabled = true - // Load base.config by default for all pipelines -includeConfig 'conf/katana.config' - -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +includeConfig 'conf/base.config' -// Load nf-core/proteinfold custom profiles from different institutions. -try { - includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") -} profiles { debug { dumpHashes = true @@ -209,7 +142,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda', 'defaults'] + conda.channels = ['conda-forge', 'bioconda'] apptainer.enabled = false } mamba { @@ -224,7 +157,6 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true if (params.use_gpu) { docker.runOptions = '--gpus all' } else { @@ -322,18 +254,20 @@ profiles { test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } } -// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile -// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled -// Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" -// Nextflow plugins -plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} +// Load nf-core/proteinfold custom profiles from different institutions. +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. @@ -346,8 +280,15 @@ env { JULIA_DEPOT_PATH = "/usr/local/share/julia" } -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set bash options +process.shell = """\ +bash + +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +""" // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false @@ -355,19 +296,19 @@ nextflow.enable.configProcessNamesValidation = false def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -376,57 +317,63 @@ manifest { homePage = 'https://github.com/nf-core/proteinfold' description = """Protein 3D structure prediction pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.1.1' + nextflowVersion = '!>=24.04.2' + version = '1.2.0dev' doi = '10.5281/zenodo.7629996' } +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +validation { + defaultIgnoreParams = ["genomes"] + help { + enabled = true + command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m ${manifest.name} ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/${manifest.name}/blob/master/CITATIONS.md +""" + } + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText + } +} + // Load modules.config for DSL2 module specific options includeConfig 'conf/modules.config' // Load modules config for pipeline specific modes -if (params.mode == 'alphafold2') { +if (params.mode.toLowerCase().split(",").contains("alphafold2")) { includeConfig 'conf/modules_alphafold2.config' -} else if (params.mode == 'colabfold') { +} +if (params.mode.toLowerCase().split(",").contains("colabfold")) { includeConfig 'conf/modules_colabfold.config' -} else if (params.mode == 'esmfold') { +} +if (params.mode.toLowerCase().split(",").contains("esmfold")) { includeConfig 'conf/modules_esmfold.config' -} else if (params.mode == 'rosettafold_all_atom') { +} +if (params.mode.toLowerCase().split(",").contains("rosettafold_all_atom")) { includeConfig 'conf/modules_rosettafold_all_atom.config' } // Load links to DBs and parameters includeConfig 'conf/dbs.config' - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/nextflow.config.1 b/nextflow.config.1 new file mode 100644 index 00000000..d8fc2623 --- /dev/null +++ b/nextflow.config.1 @@ -0,0 +1,373 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/proteinfold Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +// Global default params, used in configs +params { + + // Input options + input = null + mode = 'alphafold2' // {alphafold2, colabfold, esmfold} + use_gpu = false + + // Alphafold2 parameters + alphafold2_mode = "standard" + max_template_date = "2020-05-14" + full_dbs = false // true full_dbs, false reduced_dbs + alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} + alphafold2_db = null + + // Alphafold2 links + bfd_link = null + small_bfd_link = null + alphafold2_params_link = null + mgnify_link = null + pdb70_link = null + pdb_mmcif_link = null + pdb_obsolete_link = null + uniref30_alphafold2_link = null + uniref90_link = null + pdb_seqres_link = null + uniprot_sprot_link = null + uniprot_trembl_link = null + + // Alphafold2 paths + bfd_path = null + small_bfd_path = null + alphafold2_params_path = null + mgnify_path = null + pdb70_path = null + pdb_mmcif_path = null + uniref30_alphafold2_path = null + uniref90_path = null + pdb_seqres_path = null + uniprot_path = null + + // Colabfold parameters + colabfold_server = "webserver" + colabfold_model_preset = "alphafold2_ptm" // {'auto', 'alphafold2', 'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} + num_recycles_colabfold = 3 + use_amber = true + colabfold_db = null + db_load_mode = 0 + host_url = null + use_templates = true + create_colabfold_index = false + + // Colabfold links + colabfold_db_link = null + uniref30_colabfold_link = null + + // Colabfold paths + colabfold_db_path = null + uniref30_colabfold_path = null + + // Esmfold parameters + esmfold_db = null + esmfold_model_preset = "monomer" + num_recycles_esmfold = 4 + + // Esmfold links + esmfold_3B_v1 = null + esm2_t36_3B_UR50D = null + esm2_t36_3B_UR50D_contact_regression = null + + // Esmfold paths + esmfold_params_path = null + + // Foldseek params + foldseek_search = null + foldseek_easysearch_arg = null + + // Process skipping options + skip_multiqc = false + skip_visualisation = false + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + // Config options + config_profile_name = null + config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Schema validation default options + validate_params = true + +} + +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' + +profiles { + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + if (params.use_gpu) { + docker.runOptions = '--gpus all' + } else { + docker.runOptions = '-u $(id -u):$(id -g)' + } + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + arm { + if (params.use_gpu) { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' + } else { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + if (params.use_gpu) { singularity.runOptions = '--nv' } + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } + test { includeConfig 'conf/test.config' } + test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } + test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } + test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } + test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } + test_colabfold_download { includeConfig 'conf/test_colabfold_download.config' } + test_esmfold { includeConfig 'conf/test_esmfold.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } + test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } + test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } + test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } + test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } + test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } + test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } + test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } +} + +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + +// Load nf-core/proteinfold custom profiles from different institutions. +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} + +// Set bash options +process.shell = """\ +bash + +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +""" + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +timeline { + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" +} +report { + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" +} +trace { + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" +} +dag { + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" +} + +manifest { + name = 'nf-core/proteinfold' + author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" + homePage = 'https://github.com/nf-core/proteinfold' + description = """Protein 3D structure prediction pipeline""" + mainScript = 'main.nf' + nextflowVersion = '!>=24.04.2' + version = '1.2.0dev' + doi = '10.5281/zenodo.7629996' +} + +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +validation { + defaultIgnoreParams = ["genomes"] + help { + enabled = true + command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m ${manifest.name} ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/${manifest.name}/blob/master/CITATIONS.md +""" + } + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText + } +} + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// Load modules config for pipeline specific modes +if (params.mode.toLowerCase().split(",").contains("alphafold2")) { + includeConfig 'conf/modules_alphafold2.config' +} +if (params.mode.toLowerCase().split(",").contains("colabfold")) { + includeConfig 'conf/modules_colabfold.config' +} +if (params.mode.toLowerCase().split(",").contains("esmfold")) { + includeConfig 'conf/modules_esmfold.config' +} + +// Load links to DBs and parameters +includeConfig 'conf/dbs.config' From 2136354ef9d40399d969a04c53efd83a10c61fa1 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 12:52:21 +1100 Subject: [PATCH 054/135] Updated naming scheme with merged changes --- conf/dbs.config | 49 +++---- nextflow.config | 44 +++++- nextflow.config.2 | 339 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 401 insertions(+), 31 deletions(-) create mode 100644 nextflow.config.2 diff --git a/conf/dbs.config b/conf/dbs.config index 8368946c..b328e8f3 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -16,7 +16,7 @@ params { pdb70 = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' pdb_mmcif = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ pdb_obsolete = 'ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat' - uniclust30 = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' + uniref30 = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniref30_2018_08_hhsuite.tar.gz' uniref90 = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' pdb_seqres = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' @@ -27,7 +27,7 @@ params { mgnify_name = params.mgnify_prefix ?: 'mgnify' pdb70_name = params.pdb70_prefix ?: 'pdb70' pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' - uniclust30_name = params.uniclust30_prefix ?: 'uniclust30' + uniref30_name = params.uniref30_prefix ?: 'uniref30' uniref90_name = params.uniref90_prefix ?: 'uniref90' pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' uniprot_name = params.uniprot_prefix ?: 'uniprot' @@ -35,7 +35,7 @@ params { mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' - uniclust30_db_name = params.uniclust30_db ?: 'uniclust30_2018_08' + uniref30_db_name = params.uniref30_db ?: 'uniref30_2018_08' bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' @@ -45,47 +45,31 @@ params { // Alphafold paths - bfd_path = "${params.alphafold2_db}/${bfd_name}/*" - small_bfd_path = "${params.alphafold2_db}/${smallbfd_name}/*" - //alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" - //alphafold2_params_path = "${params.alphafold2_db}/params/*" - alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" - mgnify_path = "${params.alphafold2_db}/${mgnify_name}/*" - pdb70_path = "${params.alphafold2_db}/${pdb70_name}/**" - pdb_mmcif_path = "${params.alphafold2_db}/${pdb_mmcif_name}/**" - //uniclust30_path = "${params.alphafold2_db}/uniclust30/**" - //uniclust30_path = "/srv/scratch/sbf/uniclust30/**" - uniclust30_path = "${params.alphafold2_db}/${uniclust30_name}/**" - uniref90_path = "${params.alphafold2_db}/${uniref90_name}/*" - pdb_seqres_path = "${params.alphafold2_db}/${pdb_seqres_name}/*" - uniprot_path = "${params.alphafold2_db}/${uniprot_name}/*" + bfd_path = "${params.alphafold2_db}/${bfd_name}/*" + small_bfd_path = "${params.alphafold2_db}/${smallbfd_name}/*" + alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" + mgnify_path = "${params.alphafold2_db}/${mgnify_name}/*" + pdb70_path = "${params.alphafold2_db}/${pdb70_name}/**" + pdb_mmcif_path = "${params.alphafold2_db}/${pdb_mmcif_name}/**" + uniref30_alphafold2_path = "${params.alphafold2_db}/${uniref30_name}/**" + uniref90_path = "${params.alphafold2_db}/${uniref90_name}/*" + pdb_seqres_path = "${params.alphafold2_db}/${pdb_seqres_name}/*" + uniprot_path = "${params.alphafold2_db}/${uniprot_name}/*" // Alphafold variables - //bfd_variable = "${params.alphafold2_db}/bfd/" bfd_dir_path = "${params.alphafold2_db}/${bfd_name}/" - //small_bfd_variable = "${params.alphafold2_db}/smallbfd/" small_bfd_dir_path = "${params.alphafold2_db}/${smallbfd_name}/" - //mgnify_variable = "${params.alphafold2_db}/mgnify/" mgnify_dir_path = "${params.alphafold2_db}/${mgnify_name}/" - //pdb70_variable = "${params.alphafold2_db}/pdb70/" pdb70_dir_path = "${params.alphafold2_db}/${pdb70_name}/" - //pdb_mmcif_variable = "${params.alphafold2_db}/pdb_mmcif/" pdb_mmcif_dir_path = "${params.alphafold2_db}/${pdb_mmcif_name}/" - //uniclust30_variable = "${params.alphafold2_db}/uniclust30/" - //uniclust30_variable = "/srv/scratch/sbf/uniclust30/" - uniclust30_dir_path = "${params.alphafold2_db}/${uniclust30_name}/" - //uniref90_variable = "${params.alphafold2_db}/uniref90/" + uniref30_dir_path = "${params.alphafold2_db}/${uniref30_name}/" uniref90_dir_path = "${params.alphafold2_db}/${uniref90_name}/" - //pdb_seqres_variable = "${params.alphafold2_db}/pdb_seqres/" pdb_seqres_dir_path = "${params.alphafold2_db}/${pdb_seqres_name}/" - //uniprot_variable = "${params.alphafold2_db}/uniprot/" uniprot_dir_path = "${params.alphafold2_db}/${uniprot_name}/" // Alphafold MSA Variables mgnify_database_path = "${params.alphafold2_db}/${mgnify_name}/" - //template_mmcif_dir = "${params.alphafold2_db}/pdb_mmcif/mmcif_files/" template_mmcif_dir = "${params.alphafold2_db}/${mmcif_files_name}/" - //obsolete_pdbs_path = "${params.alphafold2_db}/pdb_mmcif/obsolete.dat" obsolete_pdbs_path = "${params.alphafold2_db}/${mmcif_obsolete_name}" // Colabfold links @@ -121,4 +105,9 @@ params { // Esmfold paths esmfold_params_path = "${params.esmfold_db}/*" + + // Foldseek databases paths + foldseek_db = null + foldseek_db_path = null + } diff --git a/nextflow.config b/nextflow.config index 4b3c2977..9add031b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,48 @@ params { full_dbs = false // true full_dbs, false reduced_dbs alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} alphafold2_db = null + + // Database prefixes + bfd_prefix = null + smallbfd_prefix = null + mgnify_prefix = null + pdb70_prefix = null + pdb_mmcif_prefix = null + uniref30_prefix = null + uniref90_prefix = null + pdb_seq_prefix = null + uniprot_prefix = null + alphafold_params_prefix = null + mmcif_path = null + mmcif_obsolete = null + uniref30_db = null + bfd_first_non_consensus_sequences = null + uniprot_fasta = null + pdb_seqres_txt = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null + uniref90_fasta = null + mgy_clusters_fasta = null + uniref30_prefix = null + + bfd_name = null + smallbfd_name = null + mgnify_name = null + pdb70_name = null + pdb_mmcif_name = null + uniref30_name = null + uniref90_name = null + pdb_seqres_name = null + uniprot_name = null + alphafold_params_name = null + mmcif_files_name = null + mmcif_obsolete_name = null + uniref30_db_name = null + bfd_first_non_consensus_sequences_name = null + uniprot_fasta_name = null + pdb_seqres_txt_name = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null + uniref90_fasta_name = null + mgy_clusters_fasta_name = null // Alphafold2 links bfd_link = null @@ -126,7 +168,7 @@ params { } // Load base.config by default for all pipelines -includeConfig 'conf/base.config' +includeConfig 'conf/katana.config' profiles { debug { diff --git a/nextflow.config.2 b/nextflow.config.2 new file mode 100644 index 00000000..69cc7ad2 --- /dev/null +++ b/nextflow.config.2 @@ -0,0 +1,339 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/proteinfold Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +// Global default params, used in configs +params { + + // Input options + input = null + mode = 'alphafold2' // {alphafold2, colabfold} + use_gpu = false + + // Alphafold2 parameters + alphafold2_mode = "standard" + max_template_date = "2020-05-14" + full_dbs = false // true full_dbs, false reduced_dbs + alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} + alphafold2_db = null + + // Database prefixes + bfd_prefix = null + smallbfd_prefix = null + mgnify_prefix = null + pdb70_prefix = null + pdb_mmcif_prefix = null + uniclust30_prefix = null + uniref90_prefix = null + pdb_seq_prefix = null + uniprot_prefix = null + alphafold_params_prefix = null + mmcif_path = null + mmcif_obsolete = null + uniclust30_db = null + bfd_first_non_consensus_sequences = null + uniprot_fasta = null + pdb_seqres_txt = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null + uniref90_fasta = null + mgy_clusters_fasta = null + uniclust30_prefix = null + + bfd_name = null + smallbfd_name = null + mgnify_name = null + pdb70_name = null + pdb_mmcif_name = null + uniclust30_name = null + uniref90_name = null + pdb_seqres_name = null + uniprot_name = null + alphafold_params_name = null + mmcif_files_name = null + mmcif_obsolete_name = null + uniclust30_db_name = null + bfd_first_non_consensus_sequences_name = null + uniprot_fasta_name = null + pdb_seqres_txt_name = null + bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null + uniref90_fasta_name = null + mgy_clusters_fasta_name = null + + // Alphafold2 links + bfd = null + small_bfd = null + alphafold2_params = null + mgnify = null + pdb70 = null + pdb_mmcif = null + pdb_obsolete = null + uniclust30 = null + uniref90 = null + pdb_seqres = null + uniprot_sprot = null + uniprot_trembl = null + + // Alphafold2 paths + bfd_path = null + small_bfd_path = null + alphafold2_params_path = null + mgnify_path = null + pdb70_path = null + pdb_mmcif_path = null + uniclust30_path = null + uniref90_path = null + pdb_seqres_path = null + uniprot_path = null + + // Colabfold parameters + colabfold_server = "webserver" + colabfold_model_preset = "AlphaFold2-ptm" // {AlphaFold2-ptm,AlphaFold2-multimer-v1,AlphaFold2-multimer-v2} + num_recycle = 3 + use_amber = true + colabfold_db = null + db_load_mode = 0 + host_url = null + use_templates = true + create_colabfold_index = false + + // Colabfold links + colabfold_db_link = null + uniref30 = null + + // Colabfold paths + colabfold_db_path = null + uniref30_path = null + + // MultiQC options + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' + multiqc_methods_description = null + + // Boilerplate options + outdir = null + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes' + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + +} + +spack.enabled = true + +// Load base.config by default for all pipelines +includeConfig 'conf/katana.config' + +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} + +// Load nf-core/proteinfold custom profiles from different institutions. +try { + includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") +} + + +profiles { + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + docker { + docker.enabled = true + docker.userEmulation = true + if (params.use_gpu) { docker.runOptions = '--gpus all' } + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + arm { + if (params.use_gpu) { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' + } else { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + if (params.use_gpu) { singularity.runOptions = '--nv' } + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } + test { includeConfig 'conf/test.config' } + test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } + test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } + test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } + test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } + test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } + test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } + test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } + test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } +} + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} + +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] + +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +timeline { + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" +} +report { + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" +} +trace { + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" +} +dag { + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" +} + +manifest { + name = 'nf-core/proteinfold' + author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" + homePage = 'https://github.com/nf-core/proteinfold' + description = """Protein 3D structure prediction pipeline""" + mainScript = 'main.nf' + nextflowVersion = '!>=22.10.1' + version = '1.0.0' + doi = '10.5281/zenodo.7629996' +} + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// Load modules config for pipeline specific modes +if (params.mode == 'alphafold2') { + includeConfig 'conf/modules_alphafold2.config' +} else if (params.mode == 'colabfold') { + includeConfig 'conf/modules_colabfold.config' +} + +// Load links to DBs and parameters +includeConfig 'conf/dbs.config' + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} From b0f13c33e2e99442587cbb3db605f6b64671fa66 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 13:34:58 +1100 Subject: [PATCH 055/135] modified: modules/local/run_alphafold2.nf modified: modules/local/run_alphafold2_pred.nf --- modules/local/run_alphafold2.nf | 8 -------- modules/local/run_alphafold2_pred.nf | 1 - 2 files changed, 9 deletions(-) diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index 481a717e..b3221263 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -46,13 +46,6 @@ process RUN_ALPHAFOLD2 { alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 " } """ - RUNTIME_TMP=\$(mktemp -d) - nvcc --version 2>&1 | tee /home/z3545907/nvcc.txt - nvidia-smi 2>&1 | tee /home/z3545907/nvidia-smi.txt - if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ] - cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP} - then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt - fi if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi python3 /app/alphafold/run_alphafold.py \ --fasta_paths=${fasta} \ @@ -77,7 +70,6 @@ process RUN_ALPHAFOLD2 { echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv cd .. - rm -rf "\${RUNTIME_TMP}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index 3f34c95f..3d21c76d 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -46,7 +46,6 @@ process RUN_ALPHAFOLD2_PRED { --model_preset=${alphafold2_model_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ - --random_seed=53343 \ --msa_path=${msa} \ --use_gpu_relax \ $args From 903b6a27069133244f078ebf1e56058a5e61212a Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 13:43:43 +1100 Subject: [PATCH 056/135] For https://github.com/nf-core/proteinfold/issues/197 --- nextflow.config.1 | 373 ---------------------------------------------- nextflow.config.2 | 339 ----------------------------------------- 2 files changed, 712 deletions(-) delete mode 100644 nextflow.config.1 delete mode 100644 nextflow.config.2 diff --git a/nextflow.config.1 b/nextflow.config.1 deleted file mode 100644 index d8fc2623..00000000 --- a/nextflow.config.1 +++ /dev/null @@ -1,373 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold Nextflow config file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments ----------------------------------------------------------------------------------------- -*/ - -// Global default params, used in configs -params { - - // Input options - input = null - mode = 'alphafold2' // {alphafold2, colabfold, esmfold} - use_gpu = false - - // Alphafold2 parameters - alphafold2_mode = "standard" - max_template_date = "2020-05-14" - full_dbs = false // true full_dbs, false reduced_dbs - alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} - alphafold2_db = null - - // Alphafold2 links - bfd_link = null - small_bfd_link = null - alphafold2_params_link = null - mgnify_link = null - pdb70_link = null - pdb_mmcif_link = null - pdb_obsolete_link = null - uniref30_alphafold2_link = null - uniref90_link = null - pdb_seqres_link = null - uniprot_sprot_link = null - uniprot_trembl_link = null - - // Alphafold2 paths - bfd_path = null - small_bfd_path = null - alphafold2_params_path = null - mgnify_path = null - pdb70_path = null - pdb_mmcif_path = null - uniref30_alphafold2_path = null - uniref90_path = null - pdb_seqres_path = null - uniprot_path = null - - // Colabfold parameters - colabfold_server = "webserver" - colabfold_model_preset = "alphafold2_ptm" // {'auto', 'alphafold2', 'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} - num_recycles_colabfold = 3 - use_amber = true - colabfold_db = null - db_load_mode = 0 - host_url = null - use_templates = true - create_colabfold_index = false - - // Colabfold links - colabfold_db_link = null - uniref30_colabfold_link = null - - // Colabfold paths - colabfold_db_path = null - uniref30_colabfold_path = null - - // Esmfold parameters - esmfold_db = null - esmfold_model_preset = "monomer" - num_recycles_esmfold = 4 - - // Esmfold links - esmfold_3B_v1 = null - esm2_t36_3B_UR50D = null - esm2_t36_3B_UR50D_contact_regression = null - - // Esmfold paths - esmfold_params_path = null - - // Foldseek params - foldseek_search = null - foldseek_easysearch_arg = null - - // Process skipping options - skip_multiqc = false - skip_visualisation = false - - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - - // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - help_full = false - show_hidden = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - - // Config options - config_profile_name = null - config_profile_description = null - - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - - // Schema validation default options - validate_params = true - -} - -// Load base.config by default for all pipelines -includeConfig 'conf/base.config' - -profiles { - debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false - nextflow.enable.configProcessNamesValidation = true - } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda'] - apptainer.enabled = false - } - mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - docker { - docker.enabled = true - if (params.use_gpu) { - docker.runOptions = '--gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g)' - } - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - arm { - if (params.use_gpu) { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' - } - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - if (params.use_gpu) { singularity.runOptions = '--nv' } - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false - } - apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - wave { - apptainer.ociAutoPull = true - singularity.ociAutoPull = true - wave.enabled = true - wave.freeze = true - wave.strategy = 'conda,container' - } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - } - test { includeConfig 'conf/test.config' } - test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } - test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } - test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } - test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } - test_colabfold_download { includeConfig 'conf/test_colabfold_download.config' } - test_esmfold { includeConfig 'conf/test_esmfold.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } - test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } - test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } - test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } - test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } - test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } - test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } - test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } -} - -// Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" - -// Load nf-core/proteinfold custom profiles from different institutions. -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" - -// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile -// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled -// Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' -charliecloud.registry = 'quay.io' - -// Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. -// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. - -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" -} - -// Set bash options -process.shell = """\ -bash - -set -e # Exit if a tool returns a non-zero status/exit code -set -u # Treat unset variables and parameters as an error -set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute -set -C # No clobber - prevent output redirection from overwriting files. -""" - -// Disable process selector warnings by default. Use debug profile to enable warnings. -nextflow.enable.configProcessNamesValidation = false - -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" -} - -manifest { - name = 'nf-core/proteinfold' - author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" - homePage = 'https://github.com/nf-core/proteinfold' - description = """Protein 3D structure prediction pipeline""" - mainScript = 'main.nf' - nextflowVersion = '!>=24.04.2' - version = '1.2.0dev' - doi = '10.5281/zenodo.7629996' -} - -// Nextflow plugins -plugins { - id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} - -validation { - defaultIgnoreParams = ["genomes"] - help { - enabled = true - command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m ${manifest.name} ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/${manifest.name}/blob/master/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } -} - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// Load modules config for pipeline specific modes -if (params.mode.toLowerCase().split(",").contains("alphafold2")) { - includeConfig 'conf/modules_alphafold2.config' -} -if (params.mode.toLowerCase().split(",").contains("colabfold")) { - includeConfig 'conf/modules_colabfold.config' -} -if (params.mode.toLowerCase().split(",").contains("esmfold")) { - includeConfig 'conf/modules_esmfold.config' -} - -// Load links to DBs and parameters -includeConfig 'conf/dbs.config' diff --git a/nextflow.config.2 b/nextflow.config.2 deleted file mode 100644 index 69cc7ad2..00000000 --- a/nextflow.config.2 +++ /dev/null @@ -1,339 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold Nextflow config file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments ----------------------------------------------------------------------------------------- -*/ - -// Global default params, used in configs -params { - - // Input options - input = null - mode = 'alphafold2' // {alphafold2, colabfold} - use_gpu = false - - // Alphafold2 parameters - alphafold2_mode = "standard" - max_template_date = "2020-05-14" - full_dbs = false // true full_dbs, false reduced_dbs - alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} - alphafold2_db = null - - // Database prefixes - bfd_prefix = null - smallbfd_prefix = null - mgnify_prefix = null - pdb70_prefix = null - pdb_mmcif_prefix = null - uniclust30_prefix = null - uniref90_prefix = null - pdb_seq_prefix = null - uniprot_prefix = null - alphafold_params_prefix = null - mmcif_path = null - mmcif_obsolete = null - uniclust30_db = null - bfd_first_non_consensus_sequences = null - uniprot_fasta = null - pdb_seqres_txt = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null - uniref90_fasta = null - mgy_clusters_fasta = null - uniclust30_prefix = null - - bfd_name = null - smallbfd_name = null - mgnify_name = null - pdb70_name = null - pdb_mmcif_name = null - uniclust30_name = null - uniref90_name = null - pdb_seqres_name = null - uniprot_name = null - alphafold_params_name = null - mmcif_files_name = null - mmcif_obsolete_name = null - uniclust30_db_name = null - bfd_first_non_consensus_sequences_name = null - uniprot_fasta_name = null - pdb_seqres_txt_name = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null - uniref90_fasta_name = null - mgy_clusters_fasta_name = null - - // Alphafold2 links - bfd = null - small_bfd = null - alphafold2_params = null - mgnify = null - pdb70 = null - pdb_mmcif = null - pdb_obsolete = null - uniclust30 = null - uniref90 = null - pdb_seqres = null - uniprot_sprot = null - uniprot_trembl = null - - // Alphafold2 paths - bfd_path = null - small_bfd_path = null - alphafold2_params_path = null - mgnify_path = null - pdb70_path = null - pdb_mmcif_path = null - uniclust30_path = null - uniref90_path = null - pdb_seqres_path = null - uniprot_path = null - - // Colabfold parameters - colabfold_server = "webserver" - colabfold_model_preset = "AlphaFold2-ptm" // {AlphaFold2-ptm,AlphaFold2-multimer-v1,AlphaFold2-multimer-v2} - num_recycle = 3 - use_amber = true - colabfold_db = null - db_load_mode = 0 - host_url = null - use_templates = true - create_colabfold_index = false - - // Colabfold links - colabfold_db_link = null - uniref30 = null - - // Colabfold paths - colabfold_db_path = null - uniref30_path = null - - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - - // Boilerplate options - outdir = null - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - - // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - -} - -spack.enabled = true - -// Load base.config by default for all pipelines -includeConfig 'conf/katana.config' - -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -// Load nf-core/proteinfold custom profiles from different institutions. -try { - includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") -} - - -profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - docker { - docker.enabled = true - docker.userEmulation = true - if (params.use_gpu) { docker.runOptions = '--gpus all' } - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - arm { - if (params.use_gpu) { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' - } - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - if (params.use_gpu) { singularity.runOptions = '--nv' } - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } - gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB - } - test { includeConfig 'conf/test.config' } - test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } - test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } - test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } - test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } - test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } - test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } - test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } - test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } -} - -// Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. -// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. - -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" -} - -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] - -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" -} - -manifest { - name = 'nf-core/proteinfold' - author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" - homePage = 'https://github.com/nf-core/proteinfold' - description = """Protein 3D structure prediction pipeline""" - mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.0.0' - doi = '10.5281/zenodo.7629996' -} - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// Load modules config for pipeline specific modes -if (params.mode == 'alphafold2') { - includeConfig 'conf/modules_alphafold2.config' -} else if (params.mode == 'colabfold') { - includeConfig 'conf/modules_colabfold.config' -} - -// Load links to DBs and parameters -includeConfig 'conf/dbs.config' - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} From 9aa50543fe5297285f1595168bb8aa65cdcc0c45 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 13:47:13 +1100 Subject: [PATCH 057/135] Cleaned up files --- nextflow.config.1 | 373 ---------------------------------------------- nextflow.config.2 | 339 ----------------------------------------- 2 files changed, 712 deletions(-) delete mode 100644 nextflow.config.1 delete mode 100644 nextflow.config.2 diff --git a/nextflow.config.1 b/nextflow.config.1 deleted file mode 100644 index d8fc2623..00000000 --- a/nextflow.config.1 +++ /dev/null @@ -1,373 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold Nextflow config file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments ----------------------------------------------------------------------------------------- -*/ - -// Global default params, used in configs -params { - - // Input options - input = null - mode = 'alphafold2' // {alphafold2, colabfold, esmfold} - use_gpu = false - - // Alphafold2 parameters - alphafold2_mode = "standard" - max_template_date = "2020-05-14" - full_dbs = false // true full_dbs, false reduced_dbs - alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} - alphafold2_db = null - - // Alphafold2 links - bfd_link = null - small_bfd_link = null - alphafold2_params_link = null - mgnify_link = null - pdb70_link = null - pdb_mmcif_link = null - pdb_obsolete_link = null - uniref30_alphafold2_link = null - uniref90_link = null - pdb_seqres_link = null - uniprot_sprot_link = null - uniprot_trembl_link = null - - // Alphafold2 paths - bfd_path = null - small_bfd_path = null - alphafold2_params_path = null - mgnify_path = null - pdb70_path = null - pdb_mmcif_path = null - uniref30_alphafold2_path = null - uniref90_path = null - pdb_seqres_path = null - uniprot_path = null - - // Colabfold parameters - colabfold_server = "webserver" - colabfold_model_preset = "alphafold2_ptm" // {'auto', 'alphafold2', 'alphafold2_ptm', 'alphafold2_multimer_v1', 'alphafold2_multimer_v2', 'alphafold2_multimer_v3'} - num_recycles_colabfold = 3 - use_amber = true - colabfold_db = null - db_load_mode = 0 - host_url = null - use_templates = true - create_colabfold_index = false - - // Colabfold links - colabfold_db_link = null - uniref30_colabfold_link = null - - // Colabfold paths - colabfold_db_path = null - uniref30_colabfold_path = null - - // Esmfold parameters - esmfold_db = null - esmfold_model_preset = "monomer" - num_recycles_esmfold = 4 - - // Esmfold links - esmfold_3B_v1 = null - esm2_t36_3B_UR50D = null - esm2_t36_3B_UR50D_contact_regression = null - - // Esmfold paths - esmfold_params_path = null - - // Foldseek params - foldseek_search = null - foldseek_easysearch_arg = null - - // Process skipping options - skip_multiqc = false - skip_visualisation = false - - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - - // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - help_full = false - show_hidden = false - version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - - // Config options - config_profile_name = null - config_profile_description = null - - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null - - // Schema validation default options - validate_params = true - -} - -// Load base.config by default for all pipelines -includeConfig 'conf/base.config' - -profiles { - debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false - nextflow.enable.configProcessNamesValidation = true - } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - conda.channels = ['conda-forge', 'bioconda'] - apptainer.enabled = false - } - mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - docker { - docker.enabled = true - if (params.use_gpu) { - docker.runOptions = '--gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g)' - } - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - arm { - if (params.use_gpu) { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' - } - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - if (params.use_gpu) { singularity.runOptions = '--nv' } - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - } - charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false - } - apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - wave { - apptainer.ociAutoPull = true - singularity.ociAutoPull = true - wave.enabled = true - wave.freeze = true - wave.strategy = 'conda,container' - } - gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB - } - test { includeConfig 'conf/test.config' } - test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } - test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } - test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } - test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } - test_colabfold_download { includeConfig 'conf/test_colabfold_download.config' } - test_esmfold { includeConfig 'conf/test_esmfold.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } - test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } - test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } - test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } - test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } - test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } - test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } - test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } -} - -// Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" - -// Load nf-core/proteinfold custom profiles from different institutions. -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" - -// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile -// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled -// Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' -charliecloud.registry = 'quay.io' - -// Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. -// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. - -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" -} - -// Set bash options -process.shell = """\ -bash - -set -e # Exit if a tool returns a non-zero status/exit code -set -u # Treat unset variables and parameters as an error -set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute -set -C # No clobber - prevent output redirection from overwriting files. -""" - -// Disable process selector warnings by default. Use debug profile to enable warnings. -nextflow.enable.configProcessNamesValidation = false - -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" -} - -manifest { - name = 'nf-core/proteinfold' - author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" - homePage = 'https://github.com/nf-core/proteinfold' - description = """Protein 3D structure prediction pipeline""" - mainScript = 'main.nf' - nextflowVersion = '!>=24.04.2' - version = '1.2.0dev' - doi = '10.5281/zenodo.7629996' -} - -// Nextflow plugins -plugins { - id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} - -validation { - defaultIgnoreParams = ["genomes"] - help { - enabled = true - command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m ${manifest.name} ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/${manifest.name}/blob/master/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } -} - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// Load modules config for pipeline specific modes -if (params.mode.toLowerCase().split(",").contains("alphafold2")) { - includeConfig 'conf/modules_alphafold2.config' -} -if (params.mode.toLowerCase().split(",").contains("colabfold")) { - includeConfig 'conf/modules_colabfold.config' -} -if (params.mode.toLowerCase().split(",").contains("esmfold")) { - includeConfig 'conf/modules_esmfold.config' -} - -// Load links to DBs and parameters -includeConfig 'conf/dbs.config' diff --git a/nextflow.config.2 b/nextflow.config.2 deleted file mode 100644 index 69cc7ad2..00000000 --- a/nextflow.config.2 +++ /dev/null @@ -1,339 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold Nextflow config file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments ----------------------------------------------------------------------------------------- -*/ - -// Global default params, used in configs -params { - - // Input options - input = null - mode = 'alphafold2' // {alphafold2, colabfold} - use_gpu = false - - // Alphafold2 parameters - alphafold2_mode = "standard" - max_template_date = "2020-05-14" - full_dbs = false // true full_dbs, false reduced_dbs - alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} - alphafold2_db = null - - // Database prefixes - bfd_prefix = null - smallbfd_prefix = null - mgnify_prefix = null - pdb70_prefix = null - pdb_mmcif_prefix = null - uniclust30_prefix = null - uniref90_prefix = null - pdb_seq_prefix = null - uniprot_prefix = null - alphafold_params_prefix = null - mmcif_path = null - mmcif_obsolete = null - uniclust30_db = null - bfd_first_non_consensus_sequences = null - uniprot_fasta = null - pdb_seqres_txt = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null - uniref90_fasta = null - mgy_clusters_fasta = null - uniclust30_prefix = null - - bfd_name = null - smallbfd_name = null - mgnify_name = null - pdb70_name = null - pdb_mmcif_name = null - uniclust30_name = null - uniref90_name = null - pdb_seqres_name = null - uniprot_name = null - alphafold_params_name = null - mmcif_files_name = null - mmcif_obsolete_name = null - uniclust30_db_name = null - bfd_first_non_consensus_sequences_name = null - uniprot_fasta_name = null - pdb_seqres_txt_name = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null - uniref90_fasta_name = null - mgy_clusters_fasta_name = null - - // Alphafold2 links - bfd = null - small_bfd = null - alphafold2_params = null - mgnify = null - pdb70 = null - pdb_mmcif = null - pdb_obsolete = null - uniclust30 = null - uniref90 = null - pdb_seqres = null - uniprot_sprot = null - uniprot_trembl = null - - // Alphafold2 paths - bfd_path = null - small_bfd_path = null - alphafold2_params_path = null - mgnify_path = null - pdb70_path = null - pdb_mmcif_path = null - uniclust30_path = null - uniref90_path = null - pdb_seqres_path = null - uniprot_path = null - - // Colabfold parameters - colabfold_server = "webserver" - colabfold_model_preset = "AlphaFold2-ptm" // {AlphaFold2-ptm,AlphaFold2-multimer-v1,AlphaFold2-multimer-v2} - num_recycle = 3 - use_amber = true - colabfold_db = null - db_load_mode = 0 - host_url = null - use_templates = true - create_colabfold_index = false - - // Colabfold links - colabfold_db_link = null - uniref30 = null - - // Colabfold paths - colabfold_db_path = null - uniref30_path = null - - // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' - multiqc_methods_description = null - - // Boilerplate options - outdir = null - tracedir = "${params.outdir}/pipeline_info" - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - - // Config options - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null - config_profile_contact = null - config_profile_url = null - config_profile_name = null - - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - -} - -spack.enabled = true - -// Load base.config by default for all pipelines -includeConfig 'conf/katana.config' - -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -// Load nf-core/proteinfold custom profiles from different institutions. -try { - includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") -} - - -profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - docker { - docker.enabled = true - docker.userEmulation = true - if (params.use_gpu) { docker.runOptions = '--gpus all' } - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - arm { - if (params.use_gpu) { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64 --gpus all' - } else { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' - } - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - if (params.use_gpu) { singularity.runOptions = '--nv' } - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - } - shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - } - charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - } - gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB - } - test { includeConfig 'conf/test.config' } - test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } - test_colabfold_local { includeConfig 'conf/test_colabfold_local.config' } - test_colabfold_webserver { includeConfig 'conf/test_colabfold_webserver.config' } - test_full { includeConfig 'conf/test_full.config' } - test_full_alphafold2_standard { includeConfig 'conf/test_full.config' } - test_full_alphafold2_split { includeConfig 'conf/test_full_alphafold_split.config' } - test_full_alphafold2_multimer { includeConfig 'conf/test_full_alphafold_multimer.config' } - test_full_colabfold_local { includeConfig 'conf/test_full_colabfold_local.config' } - test_full_colabfold_webserver { includeConfig 'conf/test_full_colabfold_webserver.config' } - test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } -} - -// Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. -// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. - -env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" -} - -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] - -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -timeline { - enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" -} -report { - enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" -} -trace { - enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" -} -dag { - enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" -} - -manifest { - name = 'nf-core/proteinfold' - author = """Athanasios Baltzis, Jose Espinosa-Carrasco, Harshil Patel""" - homePage = 'https://github.com/nf-core/proteinfold' - description = """Protein 3D structure prediction pipeline""" - mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '1.0.0' - doi = '10.5281/zenodo.7629996' -} - -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' - -// Load modules config for pipeline specific modes -if (params.mode == 'alphafold2') { - includeConfig 'conf/modules_alphafold2.config' -} else if (params.mode == 'colabfold') { - includeConfig 'conf/modules_colabfold.config' -} - -// Load links to DBs and parameters -includeConfig 'conf/dbs.config' - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} From 2eae3c147a28fd0233864614f246a859d3db3909 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 14:29:41 +1100 Subject: [PATCH 058/135] Ran nf-core schema build --- nextflow_schema.json | 496 ++++++++++++++++++++++++++++++------------- 1 file changed, 349 insertions(+), 147 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 17467d3f..d032ba02 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/nf-core/proteinfold/master/nextflow_schema.json", "title": "nf-core/proteinfold pipeline parameters", "description": "Protein 3D structure prediction pipeline", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Global options", "type": "object", @@ -32,8 +32,7 @@ "mode": { "type": "string", "default": "alphafold2", - "description": "Specifies the mode in which the pipeline will be run", - "enum": ["alphafold2", "colabfold", "esmfold", "rosettafold_all_atom"], + "description": "Specifies the mode in which the pipeline will be run. mode can be any combination of ['alphafold2', 'colabfold', 'esmfold'] separated by a comma (',') with no spaces.", "fa_icon": "fas fa-cogs" }, "use_gpu": { @@ -76,7 +75,6 @@ }, "full_dbs": { "type": "boolean", - "default": false, "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", "fa_icon": "fas fa-battery-full" }, @@ -190,22 +188,40 @@ "type": "string", "description": "Specifies whether is a 'monomer' or 'multimer' prediction", "enum": ["monomer", "multimer"], - "fa_icon": "fas fa-stream" + "fa_icon": "fas fa-stream", + "default": "monomer" } } }, - "rosettafold_all_atom_options": { - "title": "RoseTTAFold_all_atom options", + "foldseek_options": { + "title": "Foldseek options", "type": "object", "fa_icon": "fas fa-coins", - "description": "RoseTTAFold_all_atom options", + "description": "Foldseek options.", "properties": { - "rosettafold_all_atom_db": { + "foldseek_search": { + "type": "string", + "enum": [null, "easysearch"], + "default": null, + "description": "Specifies the mode of foldseek search.", + "fa_icon": "fas fa-search" + }, + "foldseek_db": { + "type": "string", + "description": "The ID of Foldseek databases", + "fa_icon": "fas fa-server" + }, + "foldseek_db_path": { "type": "string", "format": "path", "exists": true, - "description": "Specifies the PARAMS path used by 'rosettafold_all_atom' mode", + "description": "Specifies the path to foldseek databases used by 'foldseek'.", "fa_icon": "fas fa-folder-open" + }, + "foldseek_easysearch_arg": { + "type": "string", + "description": "Specifies the arguments to be passed to foldseek easysearch command", + "fa_icon": "fas fa-server" } } }, @@ -219,6 +235,11 @@ "type": "boolean", "description": "Skip MultiQC.", "fa_icon": "fas fa-fast-forward" + }, + "skip_visualisation": { + "type": "boolean", + "description": "Skip visualisation reports.", + "fa_icon": "fas fa-fast-forward" } } }, @@ -270,41 +291,6 @@ } } }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, "alphafold2_dbs_and_parameters_link_options": { "title": "Alphafold2 DBs and parameters links options", "type": "object", @@ -313,73 +299,61 @@ "properties": { "bfd_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz", "description": "Link to BFD dababase", "fa_icon": "fas fa-link" }, "small_bfd_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz", "description": "Link to a reduced version of the BFD dababase", "fa_icon": "fas fa-link" }, "alphafold2_params_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar", "description": "Link to the Alphafold2 parameters", "fa_icon": "fas fa-link" }, "mgnify_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz", "description": "Link to the MGnify database", "fa_icon": "fas fa-link" }, "pdb70_link": { "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz", "description": "Link to the PDB70 database", "fa_icon": "fas fa-link" }, "pdb_mmcif_link": { "type": "string", - "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/", "description": "Link to the PDB mmCIF database", "fa_icon": "fas fa-link" }, "pdb_obsolete_link": { "type": "string", - "default": "https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat", "description": "Link to the PDB obsolete database", "fa_icon": "fas fa-link" }, "uniref30_alphafold2_link": { "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz", "description": "Link to the Uniclust30 database", "fa_icon": "fas fa-link" }, "uniref90_link": { "type": "string", - "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", "description": "Link to the UniRef90 database", "fa_icon": "fas fa-link" }, "pdb_seqres_link": { "type": "string", - "default": "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", "description": "Link to the PDB SEQRES database", "fa_icon": "fas fa-link" }, "uniprot_sprot_link": { "type": "string", - "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", "description": "Link to the SwissProt UniProt database", "fa_icon": "fas fa-link" }, "uniprot_trembl_link": { "type": "string", - "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", "description": "Link to the TrEMBL UniProt database", "fa_icon": "fas fa-link" } @@ -394,52 +368,62 @@ "bfd_path": { "type": "string", "description": "Path to BFD dababase", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/bfd/*" }, "small_bfd_path": { "type": "string", "description": "Path to a reduced version of the BFD database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/smallbfd/*" }, "alphafold2_params_path": { "type": "string", "description": "Path to the Alphafold2 parameters", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/params/alphafold_params_*/*" }, "mgnify_path": { "type": "string", "description": "Path to the MGnify database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/mgnify/*" }, "pdb70_path": { "type": "string", "description": "Path to the PDB70 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb70/**" }, "pdb_mmcif_path": { "type": "string", "description": "Path to the PDB mmCIF database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb_mmcif/**" }, "uniref30_alphafold2_path": { "type": "string", "description": "Path to the Uniref30 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref30/**" }, "uniref90_path": { "type": "string", "description": "Path to the UniRef90 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref90/*" }, "pdb_seqres_path": { "type": "string", "description": "Path to the PDB SEQRES database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb_seqres/*" }, "uniprot_path": { "type": "string", "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniprot/*" } } }, @@ -477,12 +461,14 @@ "colabfold_db_path": { "type": "string", "description": "Link to the Colabfold database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/colabfold_envdb_202108" }, "uniref30_colabfold_path": { "type": "string", "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref30_2302" }, "colabfold_alphafold2_params_path": { "type": "string", @@ -490,9 +476,10 @@ "fa_icon": "fas fa-folder-open" }, "colabfold_alphafold2_params_tags": { - "type": "string", + "type": "object", "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream" + "fa_icon": "fas fa-stream", + "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" } } }, @@ -531,35 +518,8 @@ "esmfold_params_path": { "type": "string", "description": "Link to the Esmfold parameters", - "fa_icon": "fas fa-folder-open" - } - } - }, - "rosettafold_all_atom_dbs_and_parameters_path_options": { - "title": "RosettaFold_All_Atom DBs and parameters path options", - "type": "object", - "description": "Parameters used to provide paths to the databases and parameters for RosettaFold_All_Atom.", - "fa_icon": "fas fa-database", - "properties": { - "uniref30_rosettafold_all_atom_path": { - "type": "string", - "description": "Path to the UniRef30 database for RosettaFold_All_Atom", - "fa_icon": "fas fa-folder-open" - }, - "blast_path": { - "type": "string", - "description": "Path to the BLAST database for RosettaFold_All_Atom", - "fa_icon": "fas fa-folder-open" - }, - "pdb100_path": { - "type": "string", - "description": "Path to the PDB100 database for RosettaFold_All_Atom", - "fa_icon": "fas fa-folder-open" - }, - "RFAA_paper_weights_path": { - "type": "string", - "description": "Path to the weights file used in the RFAA paper for RosettaFold_All_Atom", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/*" } } }, @@ -570,12 +530,6 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, "version": { "type": "boolean", "description": "Display version and exit.", @@ -659,27 +613,6 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - }, "pipelines_testdata_base_path": { "type": "string", "fa_icon": "far fa-check-circle", @@ -692,52 +625,321 @@ }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/alphafold2_options" + "$ref": "#/$defs/alphafold2_options" }, { - "$ref": "#/definitions/colabfold_options" + "$ref": "#/$defs/colabfold_options" }, { - "$ref": "#/definitions/esmfold_options" + "$ref": "#/$defs/esmfold_options" }, { - "$ref": "#/definitions/rosettafold_all_atom_options" + "$ref": "#/$defs/foldseek_options" }, { - "$ref": "#/definitions/process_skipping_options" + "$ref": "#/$defs/process_skipping_options" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/$defs/institutional_config_options" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/$defs/alphafold2_dbs_and_parameters_link_options" }, { - "$ref": "#/definitions/alphafold2_dbs_and_parameters_link_options" + "$ref": "#/$defs/alphafold2_dbs_and_parameters_path_options" }, { - "$ref": "#/definitions/alphafold2_dbs_and_parameters_path_options" + "$ref": "#/$defs/colabfold_dbs_and_parameters_link_options" }, { - "$ref": "#/definitions/colabfold_dbs_and_parameters_link_options" + "$ref": "#/$defs/colabfold_dbs_and_parameters_path_options" }, { - "$ref": "#/definitions/colabfold_dbs_and_parameters_path_options" + "$ref": "#/$defs/esmfold_parameters_link_options" }, { - "$ref": "#/definitions/esmfold_parameters_link_options" + "$ref": "#/$defs/esmfold_parameters_path_options" }, { - "$ref": "#/definitions/esmfold_parameters_path_options" + "$ref": "#/$defs/generic_options" + } + ], + "properties": { + "bfd_prefix": { + "type": "string" }, - { - "$ref": "#/definitions/rosettafold_all_atom_dbs_and_parameters_path_options" + "smallbfd_prefix": { + "type": "string" }, - { - "$ref": "#/definitions/generic_options" + "mgnify_prefix": { + "type": "string" + }, + "pdb70_prefix": { + "type": "string" + }, + "pdb_mmcif_prefix": { + "type": "string" + }, + "uniref30_prefix": { + "type": "string" + }, + "uniref90_prefix": { + "type": "string" + }, + "pdb_seq_prefix": { + "type": "string" + }, + "uniprot_prefix": { + "type": "string" + }, + "alphafold_params_prefix": { + "type": "string" + }, + "mmcif_path": { + "type": "string" + }, + "mmcif_obsolete": { + "type": "string" + }, + "uniref30_db": { + "type": "string" + }, + "bfd_first_non_consensus_sequences": { + "type": "string" + }, + "uniprot_fasta": { + "type": "string" + }, + "pdb_seqres_txt": { + "type": "string" + }, + "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt": { + "type": "string" + }, + "uniref90_fasta": { + "type": "string" + }, + "mgy_clusters_fasta": { + "type": "string" + }, + "bfd_name": { + "type": "string", + "default": "bfd" + }, + "smallbfd_name": { + "type": "string", + "default": "smallbfd" + }, + "mgnify_name": { + "type": "string", + "default": "mgnify" + }, + "pdb70_name": { + "type": "string", + "default": "pdb70" + }, + "pdb_mmcif_name": { + "type": "string", + "default": "pdb_mmcif" + }, + "uniref30_name": { + "type": "string", + "default": "uniref30" + }, + "uniref90_name": { + "type": "string", + "default": "uniref90" + }, + "pdb_seqres_name": { + "type": "string", + "default": "pdb_seqres" + }, + "uniprot_name": { + "type": "string", + "default": "uniprot" + }, + "alphafold_params_name": { + "type": "string", + "default": "params/alphafold_params_*" + }, + "mmcif_files_name": { + "type": "string", + "default": "pdb_mmcif/mmcif_files/" + }, + "mmcif_obsolete_name": { + "type": "string", + "default": "pdb_mmcif/obsolete.dat" + }, + "uniref30_db_name": { + "type": "string", + "default": "uniref30_2018_08" + }, + "bfd_first_non_consensus_sequences_name": { + "type": "string", + "default": "bfd-first_non_consensus_sequences.fasta" + }, + "uniprot_fasta_name": { + "type": "string", + "default": "uniprot.fasta" + }, + "pdb_seqres_txt_name": { + "type": "string", + "default": "pdb_seqres.txt" + }, + "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name": { + "type": "string", + "default": "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" + }, + "uniref90_fasta_name": { + "type": "string", + "default": "uniref90.fasta" + }, + "mgy_clusters_fasta_name": { + "type": "string", + "default": "mgy_clusters_2022_05.fa" + }, + "rosettafold_all_atom_db": { + "type": "string" + }, + "uniref30_rosettafold_all_atom_path": { + "type": "string" + }, + "blast_path": { + "type": "string", + "default": "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" + }, + "pdb100_path": { + "type": "string", + "default": "null/pdb100/" + }, + "RFAA_paper_weights_path": { + "type": "string" + }, + "cpuQueue": { + "type": "string", + "default": "submission" + }, + "gpuQueue": { + "type": "string", + "default": "mwacgpu2" + }, + "bfd": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" + }, + "small_bfd": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" + }, + "alphafold2_params": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar" + }, + "mgnify": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" + }, + "pdb70": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz" + }, + "pdb_mmcif": { + "type": "string", + "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/" + }, + "pdb_obsolete": { + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" + }, + "uniref30": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz" + }, + "uniref90": { + "type": "string", + "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" + }, + "pdb_seqres": { + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" + }, + "uniprot_sprot": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" + }, + "uniprot_trembl": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" + }, + "bfd_dir_path": { + "type": "string", + "default": "null/bfd/" + }, + "small_bfd_dir_path": { + "type": "string", + "default": "null/smallbfd/" + }, + "mgnify_dir_path": { + "type": "string", + "default": "null/mgnify/" + }, + "pdb70_dir_path": { + "type": "string", + "default": "null/pdb70/" + }, + "pdb_mmcif_dir_path": { + "type": "string", + "default": "null/pdb_mmcif/" + }, + "uniref30_dir_path": { + "type": "string", + "default": "null/uniref30/" + }, + "uniref90_dir_path": { + "type": "string", + "default": "null/uniref90/" + }, + "pdb_seqres_dir_path": { + "type": "string", + "default": "null/pdb_seqres/" + }, + "uniprot_dir_path": { + "type": "string", + "default": "null/uniprot/" + }, + "mgnify_database_path": { + "type": "string", + "default": "null/mgnify/" + }, + "template_mmcif_dir": { + "type": "string", + "default": "null/pdb_mmcif/mmcif_files//" + }, + "obsolete_pdbs_path": { + "type": "string", + "default": "null/pdb_mmcif/obsolete.dat" + }, + "pdb100": { + "type": "string", + "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" + }, + "RFAA_paper_weights": { + "type": "string", + "default": "http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt" + }, + "uniref30_variable": { + "type": "string", + "default": "null/uniref30/" + }, + "bfd_variable": { + "type": "string", + "default": "null/bfd/" + }, + "RFAA_paper_weights_variable": { + "type": "string" } - ] + } } From f07c612633e0eb5e43ab348af65746fe6f1098eb Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 14:38:47 +1100 Subject: [PATCH 059/135] Ran nf-core schema build --- nextflow_schema.json | 901 +++++++++++++------------------------------ 1 file changed, 264 insertions(+), 637 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index ec91e406..c7584d93 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -6,9 +6,9 @@ "type": "object", "$defs": { "input_output_options": { - "title": "Global options", + "title": "Input/output options", "type": "object", - "fa_icon": "fas fa-coins", + "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", "required": ["input", "outdir"], "properties": { @@ -20,7 +20,7 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -28,530 +28,6 @@ "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" - }, - "mode": { - "type": "string", - "default": "alphafold2", - "description": "Specifies the mode in which the pipeline will be run. mode can be any combination of ['alphafold2', 'colabfold', 'esmfold'] separated by a comma (',') with no spaces.", - "fa_icon": "fas fa-cogs" - }, - "use_gpu": { - "type": "boolean", - "description": "Run on CPUs (default) or GPUs", - "fa_icon": "fas fa-microchip" - }, - "email": { - "type": "string", - "description": "Email address for completion summary.", - "fa_icon": "fas fa-envelope", - "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" - } - } - }, - "alphafold2_options": { - "title": "Alphafold2 options", - "type": "object", - "fa_icon": "fas fa-dna", - "description": "Alphafold2 options.", - "properties": { - "max_template_date": { - "type": "string", - "default": "2020-05-14", - "description": "Maximum date of the PDB templates used by 'AlphaFold2' mode", - "fa_icon": "fas fa-calendar-check" - }, - "alphafold2_db": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the DB and PARAMS path used by 'AlphaFold2' mode", - "fa_icon": "fas fa-database" - }, - "full_dbs": { - "type": "boolean", - "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", - "fa_icon": "fas fa-battery-full" - }, - "alphafold2_mode": { - "type": "string", - "default": "standard", - "description": "Specifies the mode in which Alphafold2 will be run", - "enum": ["standard", "split_msa_prediction"], - "fa_icon": "fas fa-exchange-alt" - }, - "alphafold2_model_preset": { - "type": "string", - "default": "monomer", - "description": "Model preset for 'AlphaFold2' mode", - "enum": ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], - "fa_icon": "fas fa-stream" - } - } - }, - "colabfold_options": { - "title": "Colabfold options", - "type": "object", - "fa_icon": "fas fa-coins", - "description": "Colabfold options.", - "properties": { - "colabfold_db": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the PARAMS and DB path used by 'colabfold' mode", - "fa_icon": "fas fa-folder-open" - }, - "colabfold_server": { - "type": "string", - "default": "webserver", - "description": "Specifies the MSA server used by Colabfold", - "enum": ["webserver", "local"], - "fa_icon": "fas fa-server" - }, - "colabfold_model_preset": { - "type": "string", - "default": "alphafold2_ptm", - "description": "Model preset for 'colabfold' mode", - "enum": [ - "auto", - "alphafold2", - "alphafold2_ptm", - "alphafold2_multimer_v1", - "alphafold2_multimer_v2", - "alphafold2_multimer_v3" - ], - "fa_icon": "fas fa-stream" - }, - "num_recycles_colabfold": { - "type": "integer", - "default": 3, - "description": "Number of recycles for Colabfold", - "fa_icon": "fas fa-recycle" - }, - "use_amber": { - "type": "boolean", - "default": true, - "description": "Use Amber minimization to refine the predicted structures", - "fa_icon": "fas fa-compress-alt" - }, - "db_load_mode": { - "type": "integer", - "default": 0, - "description": "Specify the way that MMSeqs2 will load the required databases in memory", - "fa_icon": "fas fa-download", - "enum": [0, 1, 2, 3] - }, - "host_url": { - "type": "string", - "description": "Specify your custom MMSeqs2 API server url", - "fa_icon": "fas fa-link" - }, - "use_templates": { - "type": "boolean", - "default": true, - "description": "Use PDB templates", - "fa_icon": "fas fa-paste" - }, - "create_colabfold_index": { - "type": "boolean", - "description": "Create databases indexes when running colabfold_local mode", - "fa_icon": "fas fa-bezier-curve" - } - } - }, - "esmfold_options": { - "title": "Esmfold options", - "type": "object", - "fa_icon": "fas fa-coins", - "description": "Esmfold options.", - "properties": { - "esmfold_db": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the PARAMS path used by 'esmfold' mode", - "fa_icon": "fas fa-folder-open" - }, - "num_recycles_esmfold": { - "type": "integer", - "default": 4, - "description": "Specifies the number of recycles used by Esmfold", - "fa_icon": "fas fa-server" - }, - "esmfold_model_preset": { - "type": "string", - "description": "Specifies whether is a 'monomer' or 'multimer' prediction", - "enum": ["monomer", "multimer"], - "fa_icon": "fas fa-stream", - "default": "monomer" - } - } - }, - "foldseek_options": { - "title": "Foldseek options", - "type": "object", - "fa_icon": "fas fa-coins", - "description": "Foldseek options.", - "properties": { - "foldseek_search": { - "type": "string", - "enum": [null, "easysearch"], - "default": null, - "description": "Specifies the mode of foldseek search.", - "fa_icon": "fas fa-search" - }, - "foldseek_db": { - "type": "string", - "description": "The ID of Foldseek databases", - "fa_icon": "fas fa-server" - }, - "foldseek_db_path": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the path to foldseek databases used by 'foldseek'.", - "fa_icon": "fas fa-folder-open" - }, - "foldseek_easysearch_arg": { - "type": "string", - "description": "Specifies the arguments to be passed to foldseek easysearch command", - "fa_icon": "fas fa-server" - } - } - }, - "foldseek_options": { - "title": "Foldseek options", - "type": "object", - "fa_icon": "fas fa-coins", - "description": "Foldseek options.", - "properties": { - "foldseek_search": { - "type": "string", - "enum": [null, "easysearch"], - "default": null, - "description": "Specifies the mode of foldseek search.", - "fa_icon": "fas fa-search" - }, - "foldseek_db": { - "type": "string", - "description": "The ID of Foldseek databases", - "fa_icon": "fas fa-server" - }, - "foldseek_db_path": { - "type": "string", - "format": "path", - "exists": true, - "description": "Specifies the path to foldseek databases used by 'foldseek'.", - "fa_icon": "fas fa-folder-open" - }, - "foldseek_easysearch_arg": { - "type": "string", - "description": "Specifies the arguments to be passed to foldseek easysearch command", - "fa_icon": "fas fa-server" - } - } - }, - "process_skipping_options": { - "title": "Process skipping options", - "type": "object", - "fa_icon": "fas fa-fast-forward", - "description": "Options to skip various steps within the workflow.", - "properties": { - "skip_multiqc": { - "type": "boolean", - "description": "Skip MultiQC.", - "fa_icon": "fas fa-fast-forward" - }, - "skip_visualisation": { - "type": "boolean", - "description": "Skip visualisation reports.", - "fa_icon": "fas fa-fast-forward" - } - } - }, - "institutional_config_options": { - "title": "Institutional config options", - "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", - "properties": { - "custom_config_version": { - "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "custom_config_base": { - "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" - }, - "config_profile_name": { - "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_description": { - "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, - "config_profile_url": { - "type": "string", - "description": "Institutional config URL link.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - } - } - }, - "alphafold2_dbs_and_parameters_link_options": { - "title": "Alphafold2 DBs and parameters links options", - "type": "object", - "fa_icon": "fas fa-database", - "description": "Parameters used to provide the links to the DBs and parameters public resources to Alphafold2.", - "properties": { - "bfd_link": { - "type": "string", - "description": "Link to BFD dababase", - "fa_icon": "fas fa-link" - }, - "small_bfd_link": { - "type": "string", - "description": "Link to a reduced version of the BFD dababase", - "fa_icon": "fas fa-link" - }, - "alphafold2_params_link": { - "type": "string", - "description": "Link to the Alphafold2 parameters", - "fa_icon": "fas fa-link" - }, - "mgnify_link": { - "type": "string", - "description": "Link to the MGnify database", - "fa_icon": "fas fa-link" - }, - "pdb70_link": { - "type": "string", - "description": "Link to the PDB70 database", - "fa_icon": "fas fa-link" - }, - "pdb_mmcif_link": { - "type": "string", - "description": "Link to the PDB mmCIF database", - "fa_icon": "fas fa-link" - }, - "pdb_obsolete_link": { - "type": "string", - "description": "Link to the PDB obsolete database", - "fa_icon": "fas fa-link" - }, - "uniref30_alphafold2_link": { - "type": "string", - "description": "Link to the Uniclust30 database", - "fa_icon": "fas fa-link" - }, - "uniref90_link": { - "type": "string", - "description": "Link to the UniRef90 database", - "fa_icon": "fas fa-link" - }, - "pdb_seqres_link": { - "type": "string", - "description": "Link to the PDB SEQRES database", - "fa_icon": "fas fa-link" - }, - "uniprot_sprot_link": { - "type": "string", - "description": "Link to the SwissProt UniProt database", - "fa_icon": "fas fa-link" - }, - "uniprot_trembl_link": { - "type": "string", - "description": "Link to the TrEMBL UniProt database", - "fa_icon": "fas fa-link" - } - } - }, - "alphafold2_dbs_and_parameters_path_options": { - "title": "Alphafold2 DBs and parameters links options", - "type": "object", - "fa_icon": "fas fa-database", - "description": "Parameters used to provide the paths to the DBs and parameters for Alphafold2.", - "properties": { - "bfd_path": { - "type": "string", - "description": "Path to BFD dababase", - "fa_icon": "fas fa-folder-open", - "default": "null/bfd/*" - }, - "small_bfd_path": { - "type": "string", - "description": "Path to a reduced version of the BFD database", - "fa_icon": "fas fa-folder-open", - "default": "null/smallbfd/*" - }, - "alphafold2_params_path": { - "type": "string", - "description": "Path to the Alphafold2 parameters", - "fa_icon": "fas fa-folder-open", - "default": "null/params/alphafold_params_*/*" - }, - "mgnify_path": { - "type": "string", - "description": "Path to the MGnify database", - "fa_icon": "fas fa-folder-open", - "default": "null/mgnify/*" - }, - "pdb70_path": { - "type": "string", - "description": "Path to the PDB70 database", - "fa_icon": "fas fa-folder-open", - "default": "null/pdb70/**" - }, - "pdb_mmcif_path": { - "type": "string", - "description": "Path to the PDB mmCIF database", - "fa_icon": "fas fa-folder-open", - "default": "null/pdb_mmcif/**" - }, - "uniref30_alphafold2_path": { - "type": "string", - "description": "Path to the Uniref30 database", - "fa_icon": "fas fa-folder-open", - "default": "null/uniref30/**" - }, - "uniref90_path": { - "type": "string", - "description": "Path to the UniRef90 database", - "fa_icon": "fas fa-folder-open", - "default": "null/uniref90/*" - }, - "pdb_seqres_path": { - "type": "string", - "description": "Path to the PDB SEQRES database", - "fa_icon": "fas fa-folder-open", - "default": "null/pdb_seqres/*" - }, - "uniprot_path": { - "type": "string", - "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", - "fa_icon": "fas fa-folder-open", - "default": "null/uniprot/*" - } - } - }, - "colabfold_dbs_and_parameters_link_options": { - "title": "Colabfold DBs and parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", - "fa_icon": "fas fa-database", - "properties": { - "colabfold_db_link": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz", - "description": "Link to the Colabfold database", - "fa_icon": "fas fa-link" - }, - "uniref30_colabfold_link": { - "type": "string", - "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz", - "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-link" - }, - "colabfold_alphafold2_params_link": { - "type": "string", - "description": "Link to the Alphafold2 parameters for Colabfold", - "fa_icon": "fas fa-link" - } - } - }, - "colabfold_dbs_and_parameters_path_options": { - "title": "Colabfold DBs and parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", - "fa_icon": "fas fa-database", - "properties": { - "colabfold_db_path": { - "type": "string", - "description": "Link to the Colabfold database", - "fa_icon": "fas fa-folder-open", - "default": "null/colabfold_envdb_202108" - }, - "uniref30_colabfold_path": { - "type": "string", - "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-folder-open", - "default": "null/uniref30_2302" - }, - "colabfold_alphafold2_params_path": { - "type": "string", - "description": "Link to the Alphafold2 parameters for Colabfold", - "fa_icon": "fas fa-folder-open" - }, - "colabfold_alphafold2_params_tags": { - "type": "object", - "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream", - "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" - } - } - }, - "esmfold_parameters_link_options": { - "title": "Esmfold parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", - "fa_icon": "fas fa-database", - "properties": { - "esmfold_3B_v1": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt", - "description": "Link to the Esmfold 3B-v1 model", - "fa_icon": "fas fa-link" - }, - "esm2_t36_3B_UR50D": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt", - "description": "Link to the Esmfold t36-3B-UR50D model", - "fa_icon": "fas fa-link" - }, - "esm2_t36_3B_UR50D_contact_regression": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt", - "description": "Link to the Esmfold t36-3B-UR50D-contact-regression model", - "fa_icon": "fas fa-link" - } - } - }, - "esmfold_parameters_path_options": { - "title": "Esmfold parameters links options", - "type": "object", - "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", - "fa_icon": "fas fa-database", - "properties": { - "esmfold_params_path": { - "type": "string", - "description": "Link to the Esmfold parameters", - "fa_icon": "fas fa-folder-open", - "default": "null/*" } } }, @@ -568,89 +44,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, - "email_on_fail": { - "type": "string", - "description": "Email address for completion summary, only when pipeline fails.", - "fa_icon": "fas fa-exclamation-triangle", - "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true - }, - "plaintext_email": { - "type": "boolean", - "description": "Send plain-text email instead of HTML.", - "fa_icon": "fas fa-remove-format", - "hidden": true - }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true - }, - "monochrome_logs": { - "type": "boolean", - "description": "Do not use coloured log outputs.", - "fa_icon": "fas fa-palette", - "hidden": true - }, - "hook_url": { - "type": "string", - "description": "Incoming hook URL for messaging service", - "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", - "hidden": true - }, - "multiqc_config": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", - "hidden": true - }, - "multiqc_logo": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", - "fa_icon": "fas fa-image", - "hidden": true - }, - "multiqc_methods_description": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "description": "Custom MultiQC yaml file containing HTML including a methods description.", - "fa_icon": "fas fa-cog" - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, "fa_icon": "fas fa-check-square", "hidden": true - }, - "pipelines_testdata_base_path": { - "type": "string", - "fa_icon": "far fa-check-circle", - "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", - "hidden": true } } } @@ -660,46 +59,35 @@ "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/$defs/alphafold2_options" - }, - { - "$ref": "#/$defs/colabfold_options" - }, - { - "$ref": "#/$defs/esmfold_options" - }, - { - "$ref": "#/$defs/foldseek_options" - }, - { - "$ref": "#/$defs/process_skipping_options" - }, - { - "$ref": "#/$defs/institutional_config_options" + "$ref": "#/$defs/generic_options" + } + ], + "properties": { + "mode": { + "type": "string", + "default": "alphafold2" }, - { - "$ref": "#/$defs/alphafold2_dbs_and_parameters_link_options" + "use_gpu": { + "type": "boolean" }, - { - "$ref": "#/$defs/alphafold2_dbs_and_parameters_path_options" + "alphafold2_mode": { + "type": "string", + "default": "standard" }, - { - "$ref": "#/$defs/colabfold_dbs_and_parameters_link_options" + "max_template_date": { + "type": "string", + "default": "2020-05-14" }, - { - "$ref": "#/$defs/colabfold_dbs_and_parameters_path_options" + "full_dbs": { + "type": "boolean" }, - { - "$ref": "#/$defs/esmfold_parameters_link_options" + "alphafold2_model_preset": { + "type": "string", + "default": "monomer" }, - { - "$ref": "#/$defs/esmfold_parameters_path_options" + "alphafold2_db": { + "type": "string" }, - { - "$ref": "#/$defs/generic_options" - } - ], - "properties": { "bfd_prefix": { "type": "string" }, @@ -833,6 +221,158 @@ "type": "string", "default": "mgy_clusters_2022_05.fa" }, + "bfd_link": { + "type": "string" + }, + "small_bfd_link": { + "type": "string" + }, + "alphafold2_params_link": { + "type": "string" + }, + "mgnify_link": { + "type": "string" + }, + "pdb70_link": { + "type": "string" + }, + "pdb_mmcif_link": { + "type": "string" + }, + "pdb_obsolete_link": { + "type": "string" + }, + "uniref30_alphafold2_link": { + "type": "string" + }, + "uniref90_link": { + "type": "string" + }, + "pdb_seqres_link": { + "type": "string" + }, + "uniprot_sprot_link": { + "type": "string" + }, + "uniprot_trembl_link": { + "type": "string" + }, + "bfd_path": { + "type": "string", + "default": "null/bfd/*" + }, + "small_bfd_path": { + "type": "string", + "default": "null/smallbfd/*" + }, + "alphafold2_params_path": { + "type": "string", + "default": "null/params/alphafold_params_*/*" + }, + "mgnify_path": { + "type": "string", + "default": "null/mgnify/*" + }, + "pdb70_path": { + "type": "string", + "default": "null/pdb70/**" + }, + "pdb_mmcif_path": { + "type": "string", + "default": "null/pdb_mmcif/**" + }, + "uniref30_alphafold2_path": { + "type": "string", + "default": "null/uniref30/**" + }, + "uniref90_path": { + "type": "string", + "default": "null/uniref90/*" + }, + "pdb_seqres_path": { + "type": "string", + "default": "null/pdb_seqres/*" + }, + "uniprot_path": { + "type": "string", + "default": "null/uniprot/*" + }, + "colabfold_server": { + "type": "string", + "default": "webserver" + }, + "colabfold_model_preset": { + "type": "string", + "default": "alphafold2_ptm" + }, + "num_recycles_colabfold": { + "type": "integer", + "default": 3 + }, + "use_amber": { + "type": "boolean", + "default": true + }, + "colabfold_db": { + "type": "string" + }, + "db_load_mode": { + "type": "integer", + "default": 0 + }, + "host_url": { + "type": "string" + }, + "use_templates": { + "type": "boolean", + "default": true + }, + "create_colabfold_index": { + "type": "boolean" + }, + "colabfold_db_link": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz" + }, + "uniref30_colabfold_link": { + "type": "string", + "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz" + }, + "colabfold_db_path": { + "type": "string", + "default": "null/colabfold_envdb_202108" + }, + "uniref30_colabfold_path": { + "type": "string", + "default": "null/uniref30_2302" + }, + "esmfold_db": { + "type": "string" + }, + "esmfold_model_preset": { + "type": "string", + "default": "monomer" + }, + "num_recycles_esmfold": { + "type": "integer", + "default": 4 + }, + "esmfold_3B_v1": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt" + }, + "esm2_t36_3B_UR50D": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt" + }, + "esm2_t36_3B_UR50D_contact_regression": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt" + }, + "esmfold_params_path": { + "type": "string", + "default": "null/*" + }, "rosettafold_all_atom_db": { "type": "string" }, @@ -850,6 +390,77 @@ "RFAA_paper_weights_path": { "type": "string" }, + "foldseek_search": { + "type": "string" + }, + "foldseek_easysearch_arg": { + "type": "string" + }, + "skip_multiqc": { + "type": "boolean" + }, + "skip_visualisation": { + "type": "boolean" + }, + "multiqc_config": { + "type": "string" + }, + "multiqc_title": { + "type": "string" + }, + "multiqc_logo": { + "type": "string" + }, + "max_multiqc_email_size": { + "type": "string", + "default": "25.MB" + }, + "multiqc_methods_description": { + "type": "string" + }, + "publish_dir_mode": { + "type": "string", + "default": "copy" + }, + "email": { + "type": "string" + }, + "email_on_fail": { + "type": "string" + }, + "plaintext_email": { + "type": "boolean" + }, + "monochrome_logs": { + "type": "boolean" + }, + "hook_url": { + "type": "string" + }, + "pipelines_testdata_base_path": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/" + }, + "config_profile_name": { + "type": "string" + }, + "config_profile_description": { + "type": "string" + }, + "custom_config_version": { + "type": "string", + "default": "master" + }, + "custom_config_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/configs/master" + }, + "config_profile_contact": { + "type": "string" + }, + "config_profile_url": { + "type": "string" + }, "cpuQueue": { "type": "string", "default": "submission" @@ -954,6 +565,10 @@ "type": "string", "default": "null/pdb_mmcif/obsolete.dat" }, + "colabfold_alphafold2_params_tags": { + "type": "string", + "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" + }, "pdb100": { "type": "string", "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" @@ -972,6 +587,18 @@ }, "RFAA_paper_weights_variable": { "type": "string" + }, + "foldseek_db": { + "type": "string" + }, + "foldseek_db_path": { + "type": "string" + }, + "colabfold_alphafold2_params_link": { + "type": "string" + }, + "colabfold_alphafold2_params_path": { + "type": "string" } } } From 637d67c257622dcb76d275bd3582391cf4223b51 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 15:00:17 +1100 Subject: [PATCH 060/135] Dealing with permissions --- .github/.dockstore.yml | 6 - .github/CONTRIBUTING.md | 125 ------------------ .github/ISSUE_TEMPLATE/bug_report.yml | 50 ------- .github/ISSUE_TEMPLATE/config.yml | 7 - .github/ISSUE_TEMPLATE/feature_request.yml | 11 -- .github/PULL_REQUEST_TEMPLATE.md | 26 ---- .github/workflows/awsfulltest.yml | 68 ---------- .github/workflows/awstest.yml | 33 ----- .github/workflows/branch.yml | 44 ------ .github/workflows/ci.yml | 104 --------------- .github/workflows/clean-up.yml | 24 ---- .github/workflows/download_pipeline.yml | 119 ----------------- .github/workflows/fix-linting.yml | 89 ------------- .github/workflows/linting.yml | 83 ------------ .github/workflows/linting_comment.yml | 28 ---- .github/workflows/release-announcements.yml | 75 ----------- .../workflows/template_version_comment.yml | 46 ------- 17 files changed, 938 deletions(-) delete mode 100644 .github/.dockstore.yml delete mode 100644 .github/CONTRIBUTING.md delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml delete mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml delete mode 100644 .github/PULL_REQUEST_TEMPLATE.md delete mode 100644 .github/workflows/awsfulltest.yml delete mode 100644 .github/workflows/awstest.yml delete mode 100644 .github/workflows/branch.yml delete mode 100644 .github/workflows/ci.yml delete mode 100644 .github/workflows/clean-up.yml delete mode 100644 .github/workflows/download_pipeline.yml delete mode 100644 .github/workflows/fix-linting.yml delete mode 100644 .github/workflows/linting.yml delete mode 100644 .github/workflows/linting_comment.yml delete mode 100644 .github/workflows/release-announcements.yml delete mode 100644 .github/workflows/template_version_comment.yml diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml deleted file mode 100644 index 191fabd2..00000000 --- a/.github/.dockstore.yml +++ /dev/null @@ -1,6 +0,0 @@ -# Dockstore config version, not pipeline version -version: 1.2 -workflows: - - subclass: nfl - primaryDescriptorPath: /nextflow.config - publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md deleted file mode 100644 index 5d64f953..00000000 --- a/.github/CONTRIBUTING.md +++ /dev/null @@ -1,125 +0,0 @@ -# nf-core/proteinfold: Contributing Guidelines - -Hi there! -Many thanks for taking an interest in improving nf-core/proteinfold. - -We try to manage the required tasks for nf-core/proteinfold using GitHub issues, you probably came to this page when creating one. -Please use the pre-filled template to save time. - -However, don't be put off by this template - other more general issues and suggestions are welcome! -Contributions to the code are even more welcome ;) - -> [!NOTE] -> If you need help using or modifying nf-core/proteinfold then the best place to ask is on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). - -## Contribution workflow - -If you'd like to write some code for nf-core/proteinfold, the standard workflow is as follows: - -1. Check that there isn't already an issue about your idea in the [nf-core/proteinfold issues](https://github.com/nf-core/proteinfold/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this -2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/proteinfold repository](https://github.com/nf-core/proteinfold) to your GitHub account -3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). -5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged - -If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). - -## Tests - -You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: - -```bash -nextflow run . --profile debug,test,docker --outdir -``` - -When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. -Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. - -There are typically two types of tests that run: - -### Lint tests - -`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. - -If any failures or warnings are encountered, please follow the listed URL for more documentation. - -### Pipeline tests - -Each `nf-core` pipeline should be set up with a minimal set of test-data. -`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. -If there are any failures then the automated tests fail. -These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. - -## Patch - -:warning: Only in the unlikely and regretful event of a release happening with a bug. - -- On your own fork, make a new branch `patch` based on `upstream/master`. -- Fix the bug, and bump version (X.Y.Z+1). -- A PR should be made on `master` from patch to directly this particular bug. - -## Getting help - -For further information/help, please consult the [nf-core/proteinfold documentation](https://nf-co.re/proteinfold/usage) and don't hesitate to get in touch on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). - -## Pipeline contribution conventions - -To make the nf-core/proteinfold code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. - -### Adding a new step - -If you wish to contribute a new step, please use the following coding standards: - -1. Define the corresponding input channel into your new process from the expected previous process channel -2. Write the process block (see below). -3. Define the output channel if needed (see below). -4. Add any new parameters to `nextflow.config` with a default (see below). -5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). -6. Add sanity checks and validation for all relevant parameters. -7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. -9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. -10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. - -### Default values - -Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. - -Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. - -### Default processes resource requirements - -Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. - -The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. - -### Naming schemes - -Please use the following naming schemes, to make it easy to understand what is going where. - -- initial process channel: `ch_output_from_` -- intermediate and terminal channels: `ch__for_` - -### Nextflow version bumping - -If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` - -### Images and figures - -For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). - -## GitHub Codespaces - -This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. - -To get started: - -- Open the repo in [Codespaces](https://github.com/nf-core/proteinfold/codespaces) -- Tools installed - - nf-core - - Nextflow - -Devcontainer specs: - -- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml deleted file mode 100644 index 257da826..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Bug report -description: Report something that is broken or incorrect -labels: bug -body: - - type: markdown - attributes: - value: | - Before you post this issue, please check the documentation: - - - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) - - [nf-core/proteinfold pipeline documentation](https://nf-co.re/proteinfold/usage) - - - type: textarea - id: description - attributes: - label: Description of the bug - description: A clear and concise description of what the bug is. - validations: - required: true - - - type: textarea - id: command_used - attributes: - label: Command used and terminal output - description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. - render: console - placeholder: | - $ nextflow run ... - - Some output where something broke - - - type: textarea - id: files - attributes: - label: Relevant files - description: | - Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. - Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. - - - type: textarea - id: system - attributes: - label: System information - description: | - * Nextflow version _(eg. 23.04.0)_ - * Hardware _(eg. HPC, Desktop, Cloud)_ - * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ - * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - * Version of nf-core/proteinfold _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index 06ed5d1c..00000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,7 +0,0 @@ -contact_links: - - name: Join nf-core - url: https://nf-co.re/join - about: Please join the nf-core community here - - name: "Slack #proteinfold channel" - url: https://nfcore.slack.com/channels/proteinfold - about: Discussion about the nf-core/proteinfold pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml deleted file mode 100644 index 545e2122..00000000 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: Feature request -description: Suggest an idea for the nf-core/proteinfold pipeline -labels: enhancement -body: - - type: textarea - id: description - attributes: - label: Description of feature - description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. - validations: - required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 992c391e..00000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,26 +0,0 @@ - - -## PR checklist - -- [ ] This comment contains a description of changes (with reason). -- [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/proteinfold/tree/master/.github/CONTRIBUTING.md) -- [ ] If necessary, also make a PR on the nf-core/proteinfold _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. -- [ ] Make sure your code lints (`nf-core pipelines lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). -- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). -- [ ] Usage Documentation in `docs/usage.md` is updated. -- [ ] Output Documentation in `docs/output.md` is updated. -- [ ] `CHANGELOG.md` is updated. -- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml deleted file mode 100644 index ea0a032b..00000000 --- a/.github/workflows/awsfulltest.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: nf-core AWS full size tests -# This workflow is triggered on PRs opened against the master branch. -# It can be additionally triggered manually with GitHub actions workflow dispatch button. -# It runs the -profile 'test_full' on AWS batch - -on: - pull_request: - branches: - - master - workflow_dispatch: - pull_request_review: - types: [submitted] - -jobs: - run-platform: - name: Run AWS full tests - # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered - if: github.repository == 'nf-core/proteinfold' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' - runs-on: ubuntu-latest - # Do a full-scale run on each of the mode - strategy: - matrix: - mode: - [ - "alphafold2_standard", - "alphafold2_split", - "alphafold2_multimer", - "colabfold_local", - "colabfold_webserver", - "colabfold_multimer", - "esmfold", - "esmfold_multimer", - ] - steps: - - uses: octokit/request-action@v2.x - id: check_approvals - with: - route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - id: test_variables - if: github.event_name != 'workflow_dispatch' - run: | - JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' - CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') - test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required - - name: Launch workflow via Seqera Platform - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} - parameters: | - { - "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-${{ github.sha }}/mode_${{ matrix.mode }}" - } - profiles: test_full_${{ matrix.mode }} - - - uses: actions/upload-artifact@v4 - if: success() || failure() - with: - name: Seqera Platform debug log file - path: | - seqera_platform_action_*.log - seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml deleted file mode 100644 index ee725793..00000000 --- a/.github/workflows/awstest.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: nf-core AWS test -# This workflow can be triggered manually with the GitHub actions workflow dispatch button. -# It runs the -profile 'test' on AWS batch - -on: - workflow_dispatch: -jobs: - run-platform: - name: Run AWS tests - if: github.repository == 'nf-core/proteinfold' - runs-on: ubuntu-latest - steps: - # Launch workflow using Seqera Platform CLI tool action - - name: Launch workflow via Seqera Platform - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-test-${{ github.sha }}" - } - profiles: test - - - uses: actions/upload-artifact@v4 - with: - name: Seqera Platform debug log file - path: | - seqera_platform_action_*.log - seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml deleted file mode 100644 index cabcdbdd..00000000 --- a/.github/workflows/branch.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: nf-core branch protection -# This workflow is triggered on PRs to master branch on the repository -# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` -on: - pull_request_target: - branches: [master] - -jobs: - test: - runs-on: ubuntu-latest - steps: - # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - - name: Check PRs - if: github.repository == 'nf-core/proteinfold' - run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/proteinfold ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - - # If the above check failed, post a comment on the PR explaining the failure - # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 - with: - message: | - ## This PR is against the `master` branch :x: - - * Do not close this PR - * Click _Edit_ and change the `base` to `dev` - * This CI test will remain failed until you push a new commit - - --- - - Hi @${{ github.event.pull_request.user.login }}, - - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. - The `master` branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. - - You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. - Note that even after this, the test will continue to show as failing until you push a new commit. - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 161ca5e8..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,104 +0,0 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - workflow_dispatch: - -env: - NXF_ANSI_LOG: false - NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity - NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.profile }} | ${{ matrix.test_profile }})" - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "24.04.2" - - "latest-everything" - profile: - - "conda" - - "docker" - - "singularity" - test_name: - - "test" - test_profile: - - "test" - - "test_alphafold2_split" - - "test_alphafold2_download" - - "test_colabfold_local" - - "test_colabfold_webserver" - - "test_colabfold_download" - - "test_esmfold" - isMaster: - - ${{ github.base_ref == 'master' }} - # Exclude conda and singularity on dev - exclude: - - isMaster: false - profile: "conda" - - isMaster: false - profile: "singularity" - steps: - - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - - name: Set up Nextflow - uses: nf-core/setup-nextflow@v2 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Set up Apptainer - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - if: matrix.profile == 'singularity' - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Set up Miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 - with: - miniconda-version: "latest" - auto-update-conda: true - conda-solver: libmamba - channels: conda-forge,bioconda - - - name: Set up Conda - if: matrix.profile == 'conda' - run: | - echo $(realpath $CONDA)/condabin >> $GITHUB_PATH - echo $(realpath python) >> $GITHUB_PATH - - - name: Clean up Disk space - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - name: Run pipeline with test data (docker) - run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_profile }},docker --outdir ./results - - - name: Run pipeline with test data (singularity) - run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_profile }},singularity --outdir ./results - if: "${{ github.base_ref == 'master' }}" - - # ## Warning: Pipeline can not be run with conda - # - name: Run pipeline with test data (conda) - # run: | - # nextflow run ${GITHUB_WORKSPACE} -profile test,conda --outdir ./results - # if: "${{ github.base_ref == 'master' }}" diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml deleted file mode 100644 index 0b6b1f27..00000000 --- a/.github/workflows/clean-up.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: "Close user-tagged issues and PRs" -on: - schedule: - - cron: "0 0 * * 0" # Once a week - -jobs: - clean-up: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 - with: - stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." - stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." - close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." - days-before-stale: 30 - days-before-close: 20 - days-before-pr-close: -1 - any-of-labels: "awaiting-changes,awaiting-feedback" - exempt-issue-labels: "WIP" - exempt-pr-labels: "WIP" - repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml deleted file mode 100644 index 51f84a59..00000000 --- a/.github/workflows/download_pipeline.yml +++ /dev/null @@ -1,119 +0,0 @@ -name: Test successful pipeline download with 'nf-core pipelines download' - -# Run the workflow when: -# - dispatched manually -# - when a PR is opened or reopened to master branch -# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. -on: - workflow_dispatch: - inputs: - testbranch: - description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." - required: true - default: "dev" - pull_request: - types: - - opened - - edited - - synchronize - branches: - - master - pull_request_target: - branches: - - master - -env: - NXF_ANSI_LOG: false - -jobs: - download: - runs-on: ubuntu-latest - steps: - - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 - - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.12" - architecture: "x64" - - - name: Setup Apptainer - uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 - with: - apptainer-version: 1.3.4 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git - - - name: Get the repository name and current branch set as environment variable - run: | - echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} - echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - - - name: Make a cache directory for the container images - run: | - mkdir -p ./singularity_container_images - - - name: Download the pipeline - env: - NXF_SINGULARITY_CACHEDIR: ./singularity_container_images - run: | - nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ - --revision ${{ env.REPO_BRANCH }} \ - --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ - --compress "none" \ - --container-system 'singularity' \ - --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \ - --container-cache-utilisation 'amend' \ - --download-configuration 'yes' - - - name: Inspect download - run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - - name: Count the downloaded number of container images - id: count_initial - run: | - image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) - echo "Initial container image count: $image_count" - echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} - - - name: Run the downloaded pipeline (stub) - id: stub_run_pipeline - continue-on-error: true - env: - NXF_SINGULARITY_CACHEDIR: ./singularity_container_images - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results - - name: Run the downloaded pipeline (stub run not supported) - id: run_pipeline - if: ${{ job.steps.stub_run_pipeline.status == failure() }} - env: - NXF_SINGULARITY_CACHEDIR: ./singularity_container_images - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results - - - name: Count the downloaded number of container images - id: count_afterwards - run: | - image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) - echo "Post-pipeline run container image count: $image_count" - echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} - - - name: Compare container image counts - run: | - if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then - initial_count=${{ env.IMAGE_COUNT_INITIAL }} - final_count=${{ env.IMAGE_COUNT_AFTER }} - difference=$((final_count - initial_count)) - echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" - tree ./singularity_container_images - exit 1 - else - echo "The pipeline can be downloaded successfully!" - fi diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml deleted file mode 100644 index ddaa085a..00000000 --- a/.github/workflows/fix-linting.yml +++ /dev/null @@ -1,89 +0,0 @@ -name: Fix linting from a comment -on: - issue_comment: - types: [created] - -jobs: - fix-linting: - # Only run if comment is on a PR with the main repo, and if it contains the magic keywords - if: > - contains(github.event.comment.html_url, '/pull/') && - contains(github.event.comment.body, '@nf-core-bot fix linting') && - github.repository == 'nf-core/proteinfold' - runs-on: ubuntu-latest - steps: - # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - with: - token: ${{ secrets.nf_core_bot_auth_token }} - - # indication that the linting is being fixed - - name: React on comment - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: eyes - - # Action runs on the issue comment, so we don't get the PR by default - # Use the gh cli to check out the PR - - name: Checkout Pull Request - run: gh pr checkout ${{ github.event.issue.number }} - env: - GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - # Install and run pre-commit - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.12" - - - name: Install pre-commit - run: pip install pre-commit - - - name: Run pre-commit - id: pre-commit - run: pre-commit run --all-files - continue-on-error: true - - # indication that the linting has finished - - name: react if linting finished succesfully - if: steps.pre-commit.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: "+1" - - - name: Commit & push changes - id: commit-and-push - if: steps.pre-commit.outcome == 'failure' - run: | - git config user.email "core@nf-co.re" - git config user.name "nf-core-bot" - git config push.default upstream - git add . - git status - git commit -m "[automated] Fix code linting" - git push - - - name: react if linting errors were fixed - id: react-if-fixed - if: steps.commit-and-push.outcome == 'success' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: hooray - - - name: react if linting errors were not fixed - if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - comment-id: ${{ github.event.comment.id }} - reactions: confused - - - name: react if linting errors were not fixed - if: steps.commit-and-push.outcome == 'failure' - uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 - with: - issue-number: ${{ github.event.issue.number }} - body: | - @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. - See [CI log](https://github.com/nf-core/proteinfold/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml deleted file mode 100644 index a502573c..00000000 --- a/.github/workflows/linting.yml +++ /dev/null @@ -1,83 +0,0 @@ -name: nf-core linting -# This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core pipelines lint` and markdown lint tests to ensure -# that the code meets the nf-core guidelines. -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - - name: Set up Python 3.12 - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.12" - - - name: Install pre-commit - run: pip install pre-commit - - - name: Run pre-commit - run: pre-commit run --all-files - - nf-core: - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 - - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.12" - architecture: "x64" - - - name: read .nf-core.yml - uses: pietrobolcato/action-read-yaml@1.1.0 - id: read_yml - with: - config: ${{ github.workspace }}/.nf-core.yml - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - - - name: Run nf-core pipelines lint - if: ${{ github.base_ref != 'master' }} - env: - GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - - name: Run nf-core pipelines lint --release - if: ${{ github.base_ref == 'master' }} - env: - GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - - name: Save PR number - if: ${{ always() }} - run: echo ${{ github.event.pull_request.number }} > PR_number.txt - - - name: Upload linting log file artifact - if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 - with: - name: linting-logs - path: | - lint_log.txt - lint_results.md - PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml deleted file mode 100644 index 42e519bf..00000000 --- a/.github/workflows/linting_comment.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: nf-core linting comment -# This workflow is triggered after the linting action is complete -# It posts an automated comment to the PR, even if the PR is coming from a fork - -on: - workflow_run: - workflows: ["nf-core linting"] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Download lint results - uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 - with: - workflow: linting.yml - workflow_conclusion: completed - - - name: Get PR number - id: pr_number - run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - - - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - number: ${{ steps.pr_number.outputs.pr_number }} - path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml deleted file mode 100644 index c6ba35df..00000000 --- a/.github/workflows/release-announcements.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: release-announcements -# Automatic release toot and tweet anouncements -on: - release: - types: [published] - workflow_dispatch: - -jobs: - toot: - runs-on: ubuntu-latest - steps: - - name: get topics and convert to hashtags - id: get_topics - run: | - echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT - - - uses: rzr/fediverse-action@master - with: - access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} - host: "mstdn.science" # custom host if not "mastodon.social" (default) - # GitHub event payload - # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release - message: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - - ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics - - send-tweet: - runs-on: ubuntu-latest - - steps: - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.10" - - name: Install dependencies - run: pip install tweepy==4.14.0 - - name: Send tweet - shell: python - run: | - import os - import tweepy - - client = tweepy.Client( - access_token=os.getenv("TWITTER_ACCESS_TOKEN"), - access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), - consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), - consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), - ) - tweet = os.getenv("TWEET") - client.create_tweet(text=tweet) - env: - TWEET: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} - TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} - - bsky-post: - runs-on: ubuntu-latest - steps: - - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 - with: - post: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - env: - BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} - BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} - # diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml deleted file mode 100644 index e8aafe44..00000000 --- a/.github/workflows/template_version_comment.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: nf-core template version comment -# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. -# It posts a comment to the PR, even if it comes from a fork. - -on: pull_request_target - -jobs: - template_version: - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Read template version from .nf-core.yml - uses: nichmor/minimal-read-yaml@v0.0.2 - id: read_yml - with: - config: ${{ github.workspace }}/.nf-core.yml - - - name: Install nf-core - run: | - python -m pip install --upgrade pip - pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - - - name: Check nf-core outdated - id: nf_core_outdated - run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} - - - name: Post nf-core template version comment - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 - if: | - contains(env.OUTPUT, 'nf-core') - with: - repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} - allow-repeats: false - message: | - > [!WARNING] - > Newer version of the nf-core template is available. - > - > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. - > Please update your pipeline to the latest version. - > - > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). - # From 1dd9f0355cea0cf0411c9f3e2877cab85f4fd536 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 15:06:25 +1100 Subject: [PATCH 061/135] Readding directory --- .github/.dockstore.yml | 6 + .github/CONTRIBUTING.md | 125 ++++++++++++++++++++ .github/ISSUE_TEMPLATE/bug_report.yml | 50 ++++++++ .github/ISSUE_TEMPLATE/config.yml | 7 ++ .github/ISSUE_TEMPLATE/feature_request.yml | 11 ++ .github/PULL_REQUEST_TEMPLATE.md | 26 ++++ .github/workflows/awsfulltest.yml | 51 ++++++++ .github/workflows/awstest.yml | 33 ++++++ .github/workflows/branch.yml | 44 +++++++ .github/workflows/ci.yml | 52 ++++++++ .github/workflows/clean-up.yml | 24 ++++ .github/workflows/download_pipeline.yml | 86 ++++++++++++++ .github/workflows/fix-linting.yml | 89 ++++++++++++++ .github/workflows/linting.yml | 68 +++++++++++ .github/workflows/linting_comment.yml | 28 +++++ .github/workflows/release-announcements.yml | 75 ++++++++++++ 16 files changed, 775 insertions(+) create mode 100644 .github/.dockstore.yml create mode 100644 .github/CONTRIBUTING.md create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/workflows/awsfulltest.yml create mode 100644 .github/workflows/awstest.yml create mode 100644 .github/workflows/branch.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/clean-up.yml create mode 100644 .github/workflows/download_pipeline.yml create mode 100644 .github/workflows/fix-linting.yml create mode 100644 .github/workflows/linting.yml create mode 100644 .github/workflows/linting_comment.yml create mode 100644 .github/workflows/release-announcements.yml diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 00000000..191fabd2 --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..ad8a7f87 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,125 @@ +# nf-core/proteinfold: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving nf-core/proteinfold. + +We try to manage the required tasks for nf-core/proteinfold using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +> [!NOTE] +> If you need help using or modifying nf-core/proteinfold then the best place to ask is on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Contribution workflow + +If you'd like to write some code for nf-core/proteinfold, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [nf-core/proteinfold issues](https://github.com/nf-core/proteinfold/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/proteinfold repository](https://github.com/nf-core/proteinfold) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nextflow run . --profile debug,test,docker --outdir +``` + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. + +## Getting help + +For further information/help, please consult the [nf-core/proteinfold documentation](https://nf-co.re/proteinfold/usage) and don't hesitate to get in touch on the nf-core Slack [#proteinfold](https://nfcore.slack.com/channels/proteinfold) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/proteinfold code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/proteinfold/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..257da826 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/proteinfold pipeline documentation](https://nf-co.re/proteinfold/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/proteinfold _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..06ed5d1c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #proteinfold channel" + url: https://nfcore.slack.com/channels/proteinfold + about: Discussion about the nf-core/proteinfold pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..545e2122 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/proteinfold pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..8dc3e6a4 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,26 @@ + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/proteinfold/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/proteinfold _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 00000000..3774758d --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,51 @@ +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-platform: + name: Run AWS full tests + if: github.repository == 'nf-core/proteinfold' + runs-on: ubuntu-latest + # Do a full-scale run on each of the mode + strategy: + matrix: + mode: + [ + "alphafold2_standard", + "alphafold2_split", + "alphafold2_multimer", + "colabfold_local", + "colabfold_webserver", + "colabfold_multimer", + "esmfold", + "esmfold_multimer", + ] + steps: + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-${{ github.sha }}/mode_${{ matrix.mode }}" + } + profiles: test_full_${{ matrix.mode }} + + - uses: actions/upload-artifact@v4 + if: success() || failure() + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 00000000..ee725793 --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,33 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-platform: + name: Run AWS tests + if: github.repository == 'nf-core/proteinfold' + runs-on: ubuntu-latest + steps: + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/proteinfold/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/proteinfold/results-test-${{ github.sha }}" + } + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 00000000..cabcdbdd --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/proteinfold' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/proteinfold ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..47ad6707 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,52 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Run pipeline with test data + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + parameters: + - "test" + - "test_alphafold2_split" + - "test_alphafold2_download" + - "test_colabfold_local" + - "test_colabfold_webserver" + - "test_colabfold_download" + - "test_esmfold" + + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data ${{ matrix.parameters }} profile + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.parameters }},docker --outdir ./results_${{ matrix.parameters }} diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..0b6b1f27 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..640ac03c --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..ddaa085a --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/proteinfold' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/proteinfold/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 00000000..1fcafe88 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,68 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 00000000..40acc23f --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 00000000..03ecfcf7 --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # From 09c64fa3e18a4a7b9d99eb30e99453bb603636b4 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 15:21:01 +1100 Subject: [PATCH 062/135] modified: nextflow_schema.json --- nextflow_schema.json | 1128 ++++++++++++++++++++++-------------------- 1 file changed, 596 insertions(+), 532 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c7584d93..313997a8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -6,9 +6,9 @@ "type": "object", "$defs": { "input_output_options": { - "title": "Input/output options", + "title": "Global options", "type": "object", - "fa_icon": "fas fa-terminal", + "fa_icon": "fas fa-coins", "description": "Define where the pipeline should find input data and save output data.", "required": ["input", "outdir"], "properties": { @@ -20,7 +20,7 @@ "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -28,6 +28,496 @@ "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" + }, + "mode": { + "type": "string", + "default": "alphafold2", + "description": "Specifies the mode in which the pipeline will be run. mode can be any combination of ['alphafold2', 'colabfold', 'esmfold'] separated by a comma (',') with no spaces.", + "fa_icon": "fas fa-cogs" + }, + "use_gpu": { + "type": "boolean", + "description": "Run on CPUs (default) or GPUs", + "fa_icon": "fas fa-microchip" + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" + } + } + }, + "alphafold2_options": { + "title": "Alphafold2 options", + "type": "object", + "fa_icon": "fas fa-dna", + "description": "Alphafold2 options.", + "properties": { + "max_template_date": { + "type": "string", + "default": "2020-05-14", + "description": "Maximum date of the PDB templates used by 'AlphaFold2' mode", + "fa_icon": "fas fa-calendar-check" + }, + "alphafold2_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the DB and PARAMS path used by 'AlphaFold2' mode", + "fa_icon": "fas fa-database" + }, + "full_dbs": { + "type": "boolean", + "default": false, + "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", + "fa_icon": "fas fa-battery-full" + }, + "alphafold2_mode": { + "type": "string", + "default": "standard", + "description": "Specifies the mode in which Alphafold2 will be run", + "enum": ["standard", "split_msa_prediction"], + "fa_icon": "fas fa-exchange-alt" + }, + "alphafold2_model_preset": { + "type": "string", + "default": "monomer", + "description": "Model preset for 'AlphaFold2' mode", + "enum": ["monomer", "monomer_casp14", "monomer_ptm", "multimer"], + "fa_icon": "fas fa-stream" + } + } + }, + "colabfold_options": { + "title": "Colabfold options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Colabfold options.", + "properties": { + "colabfold_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the PARAMS and DB path used by 'colabfold' mode", + "fa_icon": "fas fa-folder-open" + }, + "colabfold_server": { + "type": "string", + "default": "webserver", + "description": "Specifies the MSA server used by Colabfold", + "enum": ["webserver", "local"], + "fa_icon": "fas fa-server" + }, + "colabfold_model_preset": { + "type": "string", + "default": "alphafold2_ptm", + "description": "Model preset for 'colabfold' mode", + "enum": [ + "auto", + "alphafold2", + "alphafold2_ptm", + "alphafold2_multimer_v1", + "alphafold2_multimer_v2", + "alphafold2_multimer_v3" + ], + "fa_icon": "fas fa-stream" + }, + "num_recycles_colabfold": { + "type": "integer", + "default": 3, + "description": "Number of recycles for Colabfold", + "fa_icon": "fas fa-recycle" + }, + "use_amber": { + "type": "boolean", + "default": true, + "description": "Use Amber minimization to refine the predicted structures", + "fa_icon": "fas fa-compress-alt" + }, + "db_load_mode": { + "type": "integer", + "default": 0, + "description": "Specify the way that MMSeqs2 will load the required databases in memory", + "fa_icon": "fas fa-download", + "enum": [0, 1, 2, 3] + }, + "host_url": { + "type": "string", + "description": "Specify your custom MMSeqs2 API server url", + "fa_icon": "fas fa-link" + }, + "use_templates": { + "type": "boolean", + "default": true, + "description": "Use PDB templates", + "fa_icon": "fas fa-paste" + }, + "create_colabfold_index": { + "type": "boolean", + "description": "Create databases indexes when running colabfold_local mode", + "fa_icon": "fas fa-bezier-curve" + } + } + }, + "esmfold_options": { + "title": "Esmfold options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Esmfold options.", + "properties": { + "esmfold_db": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the PARAMS path used by 'esmfold' mode", + "fa_icon": "fas fa-folder-open" + }, + "num_recycles_esmfold": { + "type": "integer", + "default": 4, + "description": "Specifies the number of recycles used by Esmfold", + "fa_icon": "fas fa-server" + }, + "esmfold_model_preset": { + "type": "string", + "description": "Specifies whether is a 'monomer' or 'multimer' prediction", + "enum": ["monomer", "multimer"], + "fa_icon": "fas fa-stream" + } + } + }, + "foldseek_options": { + "title": "Foldseek options", + "type": "object", + "fa_icon": "fas fa-coins", + "description": "Foldseek options.", + "properties": { + "foldseek_search": { + "type": "string", + "enum": [null, "easysearch"], + "default": null, + "description": "Specifies the mode of foldseek search.", + "fa_icon": "fas fa-search" + }, + "foldseek_db": { + "type": "string", + "description": "The ID of Foldseek databases", + "fa_icon": "fas fa-server" + }, + "foldseek_db_path": { + "type": "string", + "format": "path", + "exists": true, + "description": "Specifies the path to foldseek databases used by 'foldseek'.", + "fa_icon": "fas fa-folder-open" + }, + "foldseek_easysearch_arg": { + "type": "string", + "description": "Specifies the arguments to be passed to foldseek easysearch command", + "fa_icon": "fas fa-server" + } + } + }, + "process_skipping_options": { + "title": "Process skipping options", + "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", + "properties": { + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC.", + "fa_icon": "fas fa-fast-forward" + }, + "skip_visualisation": { + "type": "boolean", + "description": "Skip visualisation reports.", + "fa_icon": "fas fa-fast-forward" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } + }, + "alphafold2_dbs_and_parameters_link_options": { + "title": "Alphafold2 DBs and parameters links options", + "type": "object", + "fa_icon": "fas fa-database", + "description": "Parameters used to provide the links to the DBs and parameters public resources to Alphafold2.", + "properties": { + "bfd_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz", + "description": "Link to BFD dababase", + "fa_icon": "fas fa-link" + }, + "small_bfd_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz", + "description": "Link to a reduced version of the BFD dababase", + "fa_icon": "fas fa-link" + }, + "alphafold2_params_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar", + "description": "Link to the Alphafold2 parameters", + "fa_icon": "fas fa-link" + }, + "mgnify_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz", + "description": "Link to the MGnify database", + "fa_icon": "fas fa-link" + }, + "pdb70_link": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz", + "description": "Link to the PDB70 database", + "fa_icon": "fas fa-link" + }, + "pdb_mmcif_link": { + "type": "string", + "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/", + "description": "Link to the PDB mmCIF database", + "fa_icon": "fas fa-link" + }, + "pdb_obsolete_link": { + "type": "string", + "default": "https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat", + "description": "Link to the PDB obsolete database", + "fa_icon": "fas fa-link" + }, + "uniref30_alphafold2_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz", + "description": "Link to the Uniclust30 database", + "fa_icon": "fas fa-link" + }, + "uniref90_link": { + "type": "string", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz", + "description": "Link to the UniRef90 database", + "fa_icon": "fas fa-link" + }, + "pdb_seqres_link": { + "type": "string", + "default": "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt", + "description": "Link to the PDB SEQRES database", + "fa_icon": "fas fa-link" + }, + "uniprot_sprot_link": { + "type": "string", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz", + "description": "Link to the SwissProt UniProt database", + "fa_icon": "fas fa-link" + }, + "uniprot_trembl_link": { + "type": "string", + "default": "https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz", + "description": "Link to the TrEMBL UniProt database", + "fa_icon": "fas fa-link" + } + } + }, + "alphafold2_dbs_and_parameters_path_options": { + "title": "Alphafold2 DBs and parameters links options", + "type": "object", + "fa_icon": "fas fa-database", + "description": "Parameters used to provide the paths to the DBs and parameters for Alphafold2.", + "properties": { + "bfd_path": { + "type": "string", + "description": "Path to BFD dababase", + "fa_icon": "fas fa-folder-open" + }, + "small_bfd_path": { + "type": "string", + "description": "Path to a reduced version of the BFD database", + "fa_icon": "fas fa-folder-open" + }, + "alphafold2_params_path": { + "type": "string", + "description": "Path to the Alphafold2 parameters", + "fa_icon": "fas fa-folder-open" + }, + "mgnify_path": { + "type": "string", + "description": "Path to the MGnify database", + "fa_icon": "fas fa-folder-open" + }, + "pdb70_path": { + "type": "string", + "description": "Path to the PDB70 database", + "fa_icon": "fas fa-folder-open" + }, + "pdb_mmcif_path": { + "type": "string", + "description": "Path to the PDB mmCIF database", + "fa_icon": "fas fa-folder-open" + }, + "uniref30_alphafold2_path": { + "type": "string", + "description": "Path to the Uniref30 database", + "fa_icon": "fas fa-folder-open" + }, + "uniref90_path": { + "type": "string", + "description": "Path to the UniRef90 database", + "fa_icon": "fas fa-folder-open" + }, + "pdb_seqres_path": { + "type": "string", + "description": "Path to the PDB SEQRES database", + "fa_icon": "fas fa-folder-open" + }, + "uniprot_path": { + "type": "string", + "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", + "fa_icon": "fas fa-folder-open" + } + } + }, + "colabfold_dbs_and_parameters_link_options": { + "title": "Colabfold DBs and parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", + "fa_icon": "fas fa-database", + "properties": { + "colabfold_db_link": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz", + "description": "Link to the Colabfold database", + "fa_icon": "fas fa-link" + }, + "uniref30_colabfold_link": { + "type": "string", + "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz", + "description": "Link to the UniRef30 database", + "fa_icon": "fas fa-link" + }, + "colabfold_alphafold2_params_link": { + "type": "string", + "description": "Link to the Alphafold2 parameters for Colabfold", + "fa_icon": "fas fa-link" + } + } + }, + "colabfold_dbs_and_parameters_path_options": { + "title": "Colabfold DBs and parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the DBs and parameters public resources to Colabfold.", + "fa_icon": "fas fa-database", + "properties": { + "colabfold_db_path": { + "type": "string", + "description": "Link to the Colabfold database", + "fa_icon": "fas fa-folder-open" + }, + "uniref30_colabfold_path": { + "type": "string", + "description": "Link to the UniRef30 database", + "fa_icon": "fas fa-folder-open" + }, + "colabfold_alphafold2_params_path": { + "type": "string", + "description": "Link to the Alphafold2 parameters for Colabfold", + "fa_icon": "fas fa-folder-open" + }, + "colabfold_alphafold2_params_tags": { + "type": "object", + "description": "Dictionary with Alphafold2 parameters tags", + "fa_icon": "fas fa-stream" + } + } + }, + "esmfold_parameters_link_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_3B_v1": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt", + "description": "Link to the Esmfold 3B-v1 model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt", + "description": "Link to the Esmfold t36-3B-UR50D model", + "fa_icon": "fas fa-link" + }, + "esm2_t36_3B_UR50D_contact_regression": { + "type": "string", + "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt", + "description": "Link to the Esmfold t36-3B-UR50D-contact-regression model", + "fa_icon": "fas fa-link" + } + } + }, + "esmfold_parameters_path_options": { + "title": "Esmfold parameters links options", + "type": "object", + "description": "Parameters used to provide the links to the parameters public resources to Esmfold.", + "fa_icon": "fas fa-database", + "properties": { + "esmfold_params_path": { + "type": "string", + "description": "Link to the Esmfold parameters", + "fa_icon": "fas fa-folder-open" } } }, @@ -44,12 +534,89 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "multiqc_config": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Custom config file to supply to MultiQC.", + "fa_icon": "fas fa-cog", + "hidden": true + }, + "multiqc_logo": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, "fa_icon": "fas fa-check-square", "hidden": true + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } @@ -59,546 +626,43 @@ "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/$defs/generic_options" - } - ], - "properties": { - "mode": { - "type": "string", - "default": "alphafold2" - }, - "use_gpu": { - "type": "boolean" - }, - "alphafold2_mode": { - "type": "string", - "default": "standard" - }, - "max_template_date": { - "type": "string", - "default": "2020-05-14" - }, - "full_dbs": { - "type": "boolean" - }, - "alphafold2_model_preset": { - "type": "string", - "default": "monomer" - }, - "alphafold2_db": { - "type": "string" - }, - "bfd_prefix": { - "type": "string" - }, - "smallbfd_prefix": { - "type": "string" - }, - "mgnify_prefix": { - "type": "string" - }, - "pdb70_prefix": { - "type": "string" - }, - "pdb_mmcif_prefix": { - "type": "string" - }, - "uniref30_prefix": { - "type": "string" - }, - "uniref90_prefix": { - "type": "string" - }, - "pdb_seq_prefix": { - "type": "string" - }, - "uniprot_prefix": { - "type": "string" - }, - "alphafold_params_prefix": { - "type": "string" - }, - "mmcif_path": { - "type": "string" - }, - "mmcif_obsolete": { - "type": "string" - }, - "uniref30_db": { - "type": "string" - }, - "bfd_first_non_consensus_sequences": { - "type": "string" - }, - "uniprot_fasta": { - "type": "string" - }, - "pdb_seqres_txt": { - "type": "string" - }, - "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt": { - "type": "string" - }, - "uniref90_fasta": { - "type": "string" - }, - "mgy_clusters_fasta": { - "type": "string" - }, - "bfd_name": { - "type": "string", - "default": "bfd" - }, - "smallbfd_name": { - "type": "string", - "default": "smallbfd" - }, - "mgnify_name": { - "type": "string", - "default": "mgnify" - }, - "pdb70_name": { - "type": "string", - "default": "pdb70" - }, - "pdb_mmcif_name": { - "type": "string", - "default": "pdb_mmcif" - }, - "uniref30_name": { - "type": "string", - "default": "uniref30" - }, - "uniref90_name": { - "type": "string", - "default": "uniref90" - }, - "pdb_seqres_name": { - "type": "string", - "default": "pdb_seqres" - }, - "uniprot_name": { - "type": "string", - "default": "uniprot" - }, - "alphafold_params_name": { - "type": "string", - "default": "params/alphafold_params_*" - }, - "mmcif_files_name": { - "type": "string", - "default": "pdb_mmcif/mmcif_files/" - }, - "mmcif_obsolete_name": { - "type": "string", - "default": "pdb_mmcif/obsolete.dat" - }, - "uniref30_db_name": { - "type": "string", - "default": "uniref30_2018_08" - }, - "bfd_first_non_consensus_sequences_name": { - "type": "string", - "default": "bfd-first_non_consensus_sequences.fasta" - }, - "uniprot_fasta_name": { - "type": "string", - "default": "uniprot.fasta" - }, - "pdb_seqres_txt_name": { - "type": "string", - "default": "pdb_seqres.txt" - }, - "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name": { - "type": "string", - "default": "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" - }, - "uniref90_fasta_name": { - "type": "string", - "default": "uniref90.fasta" - }, - "mgy_clusters_fasta_name": { - "type": "string", - "default": "mgy_clusters_2022_05.fa" - }, - "bfd_link": { - "type": "string" - }, - "small_bfd_link": { - "type": "string" - }, - "alphafold2_params_link": { - "type": "string" - }, - "mgnify_link": { - "type": "string" - }, - "pdb70_link": { - "type": "string" - }, - "pdb_mmcif_link": { - "type": "string" - }, - "pdb_obsolete_link": { - "type": "string" - }, - "uniref30_alphafold2_link": { - "type": "string" - }, - "uniref90_link": { - "type": "string" - }, - "pdb_seqres_link": { - "type": "string" + "$ref": "#/$defs/alphafold2_options" }, - "uniprot_sprot_link": { - "type": "string" - }, - "uniprot_trembl_link": { - "type": "string" - }, - "bfd_path": { - "type": "string", - "default": "null/bfd/*" - }, - "small_bfd_path": { - "type": "string", - "default": "null/smallbfd/*" - }, - "alphafold2_params_path": { - "type": "string", - "default": "null/params/alphafold_params_*/*" - }, - "mgnify_path": { - "type": "string", - "default": "null/mgnify/*" - }, - "pdb70_path": { - "type": "string", - "default": "null/pdb70/**" - }, - "pdb_mmcif_path": { - "type": "string", - "default": "null/pdb_mmcif/**" - }, - "uniref30_alphafold2_path": { - "type": "string", - "default": "null/uniref30/**" - }, - "uniref90_path": { - "type": "string", - "default": "null/uniref90/*" - }, - "pdb_seqres_path": { - "type": "string", - "default": "null/pdb_seqres/*" - }, - "uniprot_path": { - "type": "string", - "default": "null/uniprot/*" - }, - "colabfold_server": { - "type": "string", - "default": "webserver" - }, - "colabfold_model_preset": { - "type": "string", - "default": "alphafold2_ptm" - }, - "num_recycles_colabfold": { - "type": "integer", - "default": 3 - }, - "use_amber": { - "type": "boolean", - "default": true - }, - "colabfold_db": { - "type": "string" - }, - "db_load_mode": { - "type": "integer", - "default": 0 - }, - "host_url": { - "type": "string" - }, - "use_templates": { - "type": "boolean", - "default": true - }, - "create_colabfold_index": { - "type": "boolean" - }, - "colabfold_db_link": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz" - }, - "uniref30_colabfold_link": { - "type": "string", - "default": "https://wwwuser.gwdg.de/~compbiol/colabfold/uniref30_2302.tar.gz" - }, - "colabfold_db_path": { - "type": "string", - "default": "null/colabfold_envdb_202108" - }, - "uniref30_colabfold_path": { - "type": "string", - "default": "null/uniref30_2302" - }, - "esmfold_db": { - "type": "string" - }, - "esmfold_model_preset": { - "type": "string", - "default": "monomer" - }, - "num_recycles_esmfold": { - "type": "integer", - "default": 4 - }, - "esmfold_3B_v1": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt" - }, - "esm2_t36_3B_UR50D": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt" - }, - "esm2_t36_3B_UR50D_contact_regression": { - "type": "string", - "default": "https://dl.fbaipublicfiles.com/fair-esm/regression/esm2_t36_3B_UR50D-contact-regression.pt" - }, - "esmfold_params_path": { - "type": "string", - "default": "null/*" - }, - "rosettafold_all_atom_db": { - "type": "string" - }, - "uniref30_rosettafold_all_atom_path": { - "type": "string" - }, - "blast_path": { - "type": "string", - "default": "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" - }, - "pdb100_path": { - "type": "string", - "default": "null/pdb100/" - }, - "RFAA_paper_weights_path": { - "type": "string" - }, - "foldseek_search": { - "type": "string" - }, - "foldseek_easysearch_arg": { - "type": "string" - }, - "skip_multiqc": { - "type": "boolean" - }, - "skip_visualisation": { - "type": "boolean" - }, - "multiqc_config": { - "type": "string" - }, - "multiqc_title": { - "type": "string" - }, - "multiqc_logo": { - "type": "string" - }, - "max_multiqc_email_size": { - "type": "string", - "default": "25.MB" - }, - "multiqc_methods_description": { - "type": "string" - }, - "publish_dir_mode": { - "type": "string", - "default": "copy" - }, - "email": { - "type": "string" - }, - "email_on_fail": { - "type": "string" - }, - "plaintext_email": { - "type": "boolean" - }, - "monochrome_logs": { - "type": "boolean" - }, - "hook_url": { - "type": "string" - }, - "pipelines_testdata_base_path": { - "type": "string", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/" - }, - "config_profile_name": { - "type": "string" - }, - "config_profile_description": { - "type": "string" - }, - "custom_config_version": { - "type": "string", - "default": "master" - }, - "custom_config_base": { - "type": "string", - "default": "https://raw.githubusercontent.com/nf-core/configs/master" - }, - "config_profile_contact": { - "type": "string" - }, - "config_profile_url": { - "type": "string" - }, - "cpuQueue": { - "type": "string", - "default": "submission" - }, - "gpuQueue": { - "type": "string", - "default": "mwacgpu2" - }, - "bfd": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" - }, - "small_bfd": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" - }, - "alphafold2_params": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar" - }, - "mgnify": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" - }, - "pdb70": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz" - }, - "pdb_mmcif": { - "type": "string", - "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/" - }, - "pdb_obsolete": { - "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" - }, - "uniref30": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz" - }, - "uniref90": { - "type": "string", - "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" - }, - "pdb_seqres": { - "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" - }, - "uniprot_sprot": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" - }, - "uniprot_trembl": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" - }, - "bfd_dir_path": { - "type": "string", - "default": "null/bfd/" - }, - "small_bfd_dir_path": { - "type": "string", - "default": "null/smallbfd/" - }, - "mgnify_dir_path": { - "type": "string", - "default": "null/mgnify/" - }, - "pdb70_dir_path": { - "type": "string", - "default": "null/pdb70/" - }, - "pdb_mmcif_dir_path": { - "type": "string", - "default": "null/pdb_mmcif/" - }, - "uniref30_dir_path": { - "type": "string", - "default": "null/uniref30/" - }, - "uniref90_dir_path": { - "type": "string", - "default": "null/uniref90/" - }, - "pdb_seqres_dir_path": { - "type": "string", - "default": "null/pdb_seqres/" - }, - "uniprot_dir_path": { - "type": "string", - "default": "null/uniprot/" - }, - "mgnify_database_path": { - "type": "string", - "default": "null/mgnify/" - }, - "template_mmcif_dir": { - "type": "string", - "default": "null/pdb_mmcif/mmcif_files//" + { + "$ref": "#/$defs/colabfold_options" }, - "obsolete_pdbs_path": { - "type": "string", - "default": "null/pdb_mmcif/obsolete.dat" + { + "$ref": "#/$defs/esmfold_options" }, - "colabfold_alphafold2_params_tags": { - "type": "string", - "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" + { + "$ref": "#/$defs/foldseek_options" }, - "pdb100": { - "type": "string", - "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" + { + "$ref": "#/$defs/process_skipping_options" }, - "RFAA_paper_weights": { - "type": "string", - "default": "http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt" + { + "$ref": "#/$defs/institutional_config_options" }, - "uniref30_variable": { - "type": "string", - "default": "null/uniref30/" + { + "$ref": "#/$defs/alphafold2_dbs_and_parameters_link_options" }, - "bfd_variable": { - "type": "string", - "default": "null/bfd/" + { + "$ref": "#/$defs/alphafold2_dbs_and_parameters_path_options" }, - "RFAA_paper_weights_variable": { - "type": "string" + { + "$ref": "#/$defs/colabfold_dbs_and_parameters_link_options" }, - "foldseek_db": { - "type": "string" + { + "$ref": "#/$defs/colabfold_dbs_and_parameters_path_options" }, - "foldseek_db_path": { - "type": "string" + { + "$ref": "#/$defs/esmfold_parameters_link_options" }, - "colabfold_alphafold2_params_link": { - "type": "string" + { + "$ref": "#/$defs/esmfold_parameters_path_options" }, - "colabfold_alphafold2_params_path": { - "type": "string" + { + "$ref": "#/$defs/generic_options" } - } + ] } From 3dd93671fdced394cdf446a78efc08b05b17f6dd Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 22 Oct 2024 15:33:30 +1100 Subject: [PATCH 063/135] Removed deprecated "check_max" --- conf/katana.config | 36 ++--- nextflow_schema.json | 319 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 322 insertions(+), 33 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index 67f310a7..fcfe018e 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -15,13 +15,13 @@ params { process { // TODO nf-core: Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } //executor = 'pbspro' - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -33,30 +33,30 @@ process { // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/nextflow_schema.json b/nextflow_schema.json index 313997a8..1895746a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -189,7 +189,8 @@ "type": "string", "description": "Specifies whether is a 'monomer' or 'multimer' prediction", "enum": ["monomer", "multimer"], - "fa_icon": "fas fa-stream" + "fa_icon": "fas fa-stream", + "default": "monomer" } } }, @@ -385,47 +386,56 @@ "small_bfd_path": { "type": "string", "description": "Path to a reduced version of the BFD database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/smallbfd/*" }, "alphafold2_params_path": { "type": "string", "description": "Path to the Alphafold2 parameters", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/params/alphafold_params_*/*" }, "mgnify_path": { "type": "string", "description": "Path to the MGnify database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/mgnify/*" }, "pdb70_path": { "type": "string", "description": "Path to the PDB70 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb70/**" }, "pdb_mmcif_path": { "type": "string", "description": "Path to the PDB mmCIF database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb_mmcif/**" }, "uniref30_alphafold2_path": { "type": "string", "description": "Path to the Uniref30 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref30/**" }, "uniref90_path": { "type": "string", "description": "Path to the UniRef90 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref90/*" }, "pdb_seqres_path": { "type": "string", "description": "Path to the PDB SEQRES database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb_seqres/*" }, "uniprot_path": { "type": "string", "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniprot/*" } } }, @@ -463,12 +473,14 @@ "colabfold_db_path": { "type": "string", "description": "Link to the Colabfold database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/colabfold_envdb_202108" }, "uniref30_colabfold_path": { "type": "string", "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref30_2302" }, "colabfold_alphafold2_params_path": { "type": "string", @@ -478,7 +490,8 @@ "colabfold_alphafold2_params_tags": { "type": "object", "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream" + "fa_icon": "fas fa-stream", + "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" } } }, @@ -517,7 +530,8 @@ "esmfold_params_path": { "type": "string", "description": "Link to the Esmfold parameters", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/*" } } }, @@ -664,5 +678,280 @@ { "$ref": "#/$defs/generic_options" } - ] + ], + "properties": { + "bfd_prefix": { + "type": "string" + }, + "smallbfd_prefix": { + "type": "string" + }, + "mgnify_prefix": { + "type": "string" + }, + "pdb70_prefix": { + "type": "string" + }, + "pdb_mmcif_prefix": { + "type": "string" + }, + "uniref30_prefix": { + "type": "string" + }, + "uniref90_prefix": { + "type": "string" + }, + "pdb_seq_prefix": { + "type": "string" + }, + "uniprot_prefix": { + "type": "string" + }, + "alphafold_params_prefix": { + "type": "string" + }, + "mmcif_path": { + "type": "string" + }, + "mmcif_obsolete": { + "type": "string" + }, + "uniref30_db": { + "type": "string" + }, + "bfd_first_non_consensus_sequences": { + "type": "string" + }, + "uniprot_fasta": { + "type": "string" + }, + "pdb_seqres_txt": { + "type": "string" + }, + "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt": { + "type": "string" + }, + "uniref90_fasta": { + "type": "string" + }, + "mgy_clusters_fasta": { + "type": "string" + }, + "bfd_name": { + "type": "string", + "default": "bfd" + }, + "smallbfd_name": { + "type": "string", + "default": "smallbfd" + }, + "mgnify_name": { + "type": "string", + "default": "mgnify" + }, + "pdb70_name": { + "type": "string", + "default": "pdb70" + }, + "pdb_mmcif_name": { + "type": "string", + "default": "pdb_mmcif" + }, + "uniref30_name": { + "type": "string", + "default": "uniref30" + }, + "uniref90_name": { + "type": "string", + "default": "uniref90" + }, + "pdb_seqres_name": { + "type": "string", + "default": "pdb_seqres" + }, + "uniprot_name": { + "type": "string", + "default": "uniprot" + }, + "alphafold_params_name": { + "type": "string", + "default": "params/alphafold_params_*" + }, + "mmcif_files_name": { + "type": "string", + "default": "pdb_mmcif/mmcif_files/" + }, + "mmcif_obsolete_name": { + "type": "string", + "default": "pdb_mmcif/obsolete.dat" + }, + "uniref30_db_name": { + "type": "string", + "default": "uniref30_2018_08" + }, + "bfd_first_non_consensus_sequences_name": { + "type": "string", + "default": "bfd-first_non_consensus_sequences.fasta" + }, + "uniprot_fasta_name": { + "type": "string", + "default": "uniprot.fasta" + }, + "pdb_seqres_txt_name": { + "type": "string", + "default": "pdb_seqres.txt" + }, + "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name": { + "type": "string", + "default": "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" + }, + "uniref90_fasta_name": { + "type": "string", + "default": "uniref90.fasta" + }, + "mgy_clusters_fasta_name": { + "type": "string", + "default": "mgy_clusters_2022_05.fa" + }, + "rosettafold_all_atom_db": { + "type": "string" + }, + "uniref30_rosettafold_all_atom_path": { + "type": "string" + }, + "blast_path": { + "type": "string", + "default": "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" + }, + "pdb100_path": { + "type": "string", + "default": "null/pdb100/" + }, + "RFAA_paper_weights_path": { + "type": "string" + }, + "cpuQueue": { + "type": "string", + "default": "submission" + }, + "gpuQueue": { + "type": "string", + "default": "mwacgpu2" + }, + "bfd": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" + }, + "small_bfd": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" + }, + "alphafold2_params": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar" + }, + "mgnify": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" + }, + "pdb70": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz" + }, + "pdb_mmcif": { + "type": "string", + "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/" + }, + "pdb_obsolete": { + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" + }, + "uniref30": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz" + }, + "uniref90": { + "type": "string", + "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" + }, + "pdb_seqres": { + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" + }, + "uniprot_sprot": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" + }, + "uniprot_trembl": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" + }, + "bfd_dir_path": { + "type": "string", + "default": "null/bfd/" + }, + "small_bfd_dir_path": { + "type": "string", + "default": "null/smallbfd/" + }, + "mgnify_dir_path": { + "type": "string", + "default": "null/mgnify/" + }, + "pdb70_dir_path": { + "type": "string", + "default": "null/pdb70/" + }, + "pdb_mmcif_dir_path": { + "type": "string", + "default": "null/pdb_mmcif/" + }, + "uniref30_dir_path": { + "type": "string", + "default": "null/uniref30/" + }, + "uniref90_dir_path": { + "type": "string", + "default": "null/uniref90/" + }, + "pdb_seqres_dir_path": { + "type": "string", + "default": "null/pdb_seqres/" + }, + "uniprot_dir_path": { + "type": "string", + "default": "null/uniprot/" + }, + "mgnify_database_path": { + "type": "string", + "default": "null/mgnify/" + }, + "template_mmcif_dir": { + "type": "string", + "default": "null/pdb_mmcif/mmcif_files//" + }, + "obsolete_pdbs_path": { + "type": "string", + "default": "null/pdb_mmcif/obsolete.dat" + }, + "pdb100": { + "type": "string", + "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" + }, + "RFAA_paper_weights": { + "type": "string", + "default": "http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt" + }, + "uniref30_variable": { + "type": "string", + "default": "null/uniref30/" + }, + "bfd_variable": { + "type": "string", + "default": "null/bfd/" + }, + "RFAA_paper_weights_variable": { + "type": "string" + } + } } From 0fb97351f3d476caf84705fc6d86facae43bc1db Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 23 Oct 2024 12:28:30 +1100 Subject: [PATCH 064/135] Aligning input channels for RFAA --- conf/dbs.config | 9 +-- main.nf | 16 ++--- .../local/.run_rosettafold_all_atom.nf.swp | Bin 0 -> 12288 bytes .../local/prepare_rosettafold_all_atom_dbs.nf | 54 +++------------ workflows/.rosettafold_all_atom.nf.swp | Bin 0 -> 16384 bytes workflows/rosettafold_all_atom.nf | 64 +++++++++--------- 6 files changed, 49 insertions(+), 94 deletions(-) create mode 100644 modules/local/.run_rosettafold_all_atom.nf.swp create mode 100644 workflows/.rosettafold_all_atom.nf.swp diff --git a/conf/dbs.config b/conf/dbs.config index b328e8f3..4a3becfc 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -89,14 +89,11 @@ params { // RoseTTAFold links uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' - RFAA_paper_weights = 'http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt' // RoseTTAFold paths - uniref30_variable = "${params.rosettafold_all_atom_db}/uniref30/" - pdb100_path = "${params.rosettafold_all_atom_db}/pdb100/" - bfd_variable = "${params.rosettafold_all_atom_db}/bfd/" - RFAA_paper_weights_variable = "" - blast_path = "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" + uniref30_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/uniref30/UniRef30_2021_03" + pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/pdb100_2021Mar03" + blast_path = "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/main.nf b/main.nf index 6bf7c353..269e1c06 100644 --- a/main.nf +++ b/main.nf @@ -26,7 +26,8 @@ if (params.mode.toLowerCase().split(",").contains("colabfold")) { if (params.mode.toLowerCase().split(",").contains("esmfold")) { include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' include { ESMFOLD } from './workflows/esmfold' -} else if (params.mode == "rosettafold_all_atom") { +} +if (params.mode == "rosettafold_all_atom") { include { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS } from './subworkflows/local/prepare_rosettafold_all_atom_dbs' include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold_all_atom' } @@ -208,7 +209,7 @@ workflow NFCORE_PROTEINFOLD { // // WORKFLOW: Run rosettafold_all_atom // - else if(params.mode == "rosettafold_all_atom") { + if(params.mode == "rosettafold_all_atom") { // // SUBWORKFLOW: Prepare Rosttafold-all-atom DBs // @@ -217,12 +218,6 @@ workflow NFCORE_PROTEINFOLD { params.uniref30_rosettafold_all_atom_path, params.pdb100_path, params.blast_path, - params.RFAA_paper_weights_path, - params.bfd_link, - params.uniref30_rosettafold_all_atom_link, - params.pdb100_link, - params.blast_link, - params.RFAA_paper_weights_link ) ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.versions) @@ -233,11 +228,10 @@ workflow NFCORE_PROTEINFOLD { ch_versions, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.blast, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30_rosettafold_all_atom, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.RFAA_paper_weights ) - ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report +// ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) } diff --git a/modules/local/.run_rosettafold_all_atom.nf.swp b/modules/local/.run_rosettafold_all_atom.nf.swp new file mode 100644 index 0000000000000000000000000000000000000000..d0cf306e72eda729772405a774e8858dcae18ffd GIT binary patch literal 12288 zcmeI2&2QsG7>8%Mm1UO-gt)$Iijq``oo)nFrCE`s1uAvhsvj#PTB3A3+z24`9zs$ z^(+2D`Mvr)?;do1+JH8o4QK<}fHt5FXam}SHlPh?1KPlOHozUmzMEp~^Cyud{{LV7 z{{Q=F#(oFCf^WcQ;6v~MxDNij%vb=1zy)2f2B^YCq_TGWt%eH ze_WjTz)i2QGbrX~6cSRI4Hk+qjk0NSaw?W^#D{Veg-T6v+BvgxCS^lK!O8GHYsd#> zQ=8bhyS?3eBj32&!^YiSzHvA8Try^q7lTc;S|Z#hUuKON85Ls|yq1Rr_S@z;*r|RyExd=3|BK3p{NsLU|lZ?u8;D>_>(=Ou2QPtRR#Y zcds_{P0g5Oo^-Vg*y8NPvRoTgOX9XP4@M%SZkNK$_v!eU?zy1u&m+pa^5n^wtS+NT zBJkwGY00Tuin!UTvQC47%$MFR`z)q@e?86`MZrgzj5AD^aHu~ri+w@++^+{h@%a$&GoJ2yja#(m-@Zc z)qZb#{RS1A!O4k4N?{}^&EhypB)uMm4mWAT7kIKL151%T#`;*a<%Pq{=ZPl|@ogh= z0v?hVrV{S}vrHEySU-r8vFk?%`9tn{zA$Ap7NK`X)N9RJ!}LOlw}-H0{hb=}b9k*u zwSWf$UfXM+h!NMIYh9Xa(2G_*5(3J^es9UKZ5U7?Kb1?BpwAdXB-$kcaET%w{b>n<(1{Ee$bimfZ!ZDeLF} literal 0 HcmV?d00001 diff --git a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf index 36f02e9d..84e01a67 100644 --- a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf +++ b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf @@ -1,61 +1,27 @@ // -// Download all the required Rosettafold-All-Atom databases and parameters +// TBD: Download all the required Rosettafold-All-Atom databases and parameters // -include { - ARIA2_UNCOMPRESS as ARIA2_BFD - ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD - ARIA2_UNCOMPRESS as ARIA2_UNIREF30} from './aria2_uncompress' workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { take: - rosettafold_all_atom_db // directory: path to rosettafold_all_atom DBs bfd_path // directory: /path/to/bfd/ - small_bfd_path // directory: /path/to/small_bfd/ uniref30_rosettafold_all_atom_path // directory: /path/to/uniref30/rosettafold_all_atom/ - bfd_link // string: Specifies the link to download bfd - small_bfd_link // string: Specifies the link to download small_bfd - uniref30_rosettafold_all_atom_link // string: Specifies the link to download uniref30_rosettafold_all_atom + blast_path + pdb100_path main: - ch_bfd = Channel.empty() - ch_small_bfd = Channel.empty() - ch_versions = Channel.empty() - - - if (rosettafold_all_atom_db) { - ch_bfd = Channel.value(file(bfd_path)) - ch_small_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) - ch_bfd = Channel.value(file("${projectDir}/assets/dummy_db")) - ch_small_bfd = Channel.value(file(small_bfd_path)) - ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path, type: 'any')) - } - else { - if (full_dbs) { - ARIA2_BFD( - bfd_link - ) - ch_bfd = ARIA2_BFD.out.db - ch_versions = ch_versions.mix(ARIA2_BFD.out.versions) - } else { - ARIA2_SMALL_BFD( - small_bfd_link - ) - ch_small_bfd = ARIA2_SMALL_BFD.out.db - ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) - } - - ARIA2_UNIREF30( - uniref30_rosettafold_all_atom_link - ) - ch_uniref30 = ARIA2_UNIREF30.out.db - ch_versions = ch_versions.mix(ARIA2_UNIREF30.out.versions) - } + ch_versions = Channel.empty() + ch_bfd = Channel.value(file(bfd_path)) + ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path, type: 'any')) + ch_blast = Channel.value(file(blast_path, type: 'string')) + ch_pdb100 = Channel.value(file(pdb100_path)) emit: bfd = ch_bfd - small_bfd = ch_small_bfd uniref30 = ch_uniref30 + pdb100 = ch_pdb100 + blast = ch_blast versions = ch_versions } diff --git a/workflows/.rosettafold_all_atom.nf.swp b/workflows/.rosettafold_all_atom.nf.swp new file mode 100644 index 0000000000000000000000000000000000000000..3c05258597046fca78e3557aef04334eb5964854 GIT binary patch literal 16384 zcmeHNTWl0n7(R-isEENJ`l5$|bc`gL|@{>wSv z`OiOdF5_JX2L|cRVi&=2Hz99U@0(b?@23y8JhzLy^l0~vo!#A!u}4cCQ(CCGTK7^S z@iQgxr+eUiNy0I35@|I*kG9Vd{3`hnf1CjyBfMh^2@Ly#h ztlvzIqjER%3LfI;n{wwB{CWdFKa+dTFXc%xAQ_MhNCqSWk^#wpWI!??8ITM}1|$QL zfqx+bnoh_&DA)aL0s!m(V*Gz)Eg|m$$AE``Hee(00PyEsg!~S?4ZH=s0qh3W183F{ zavJy;I0d{690z)V9$+u98dwERJ30BqnzU@y=PYylnwzPpW(Z-LK%_kfc?4cGwO3#FOsslnd9J8~IPa#y$h2lhM!`u`dK(&}#?g>fg^|VTSBnB{JyE$}*7jn> zU0egq)iqypz!$gP!YtiGO-9$%B|Fg$Mr~@q>^o`&VNjx>9~m9Al<2o@#?G3Iv}|v}Yxokb(&pih_oPu{ z`ku323nvTthKwaUV5*eLHo{bRSQu6{&o!-yw2rA`e*YRAb4k|hBD9xwoOt9#YP4Ef zvzBoqIzy~{&5t84Wj)1XG`k-}X)#fU~4e!-O$Y zfkocM<01*wgS@1p-qYp53QJw0>>aTjDoSyHh@PZ!RC9G2XwwEfB;Kje5DM`*V~H7v zv%V0F6aMhnkUCr)=^Gs__g5>uYPnKT%cIr7B8Vso$)pg?EeBgm>hib|Zx5t;R)FL` z`vq$(MRpQ)v6v-Vh#xnqKQgxOP<8mp{z~;wi4I3D9nS1AEMt#h8Ht_-*vqJEzD^xa zH-v@w3}3VnDXak5;7I!q4+hJvM`2QA-Y?F^v7*22*pL@TE1a+ zcQK{(K(TpAODGA_ppZ&idEC@f>Ga0}HAtlzB2ZFk_9LtDY~o7DvEI=^pC?Q#)TRv7 zM!XMbt7t^_AX^o3Y?+R@9Ed?WSRNRnBEXi%dOdwZXji~{(hnJNg%Zsi%JlU z3wi>@_G+4~M4@E|s%tLDMj~;wXFP};aj+@PEb75Ew=uIhA3Pyu?IztsywsET3tDus zl3%A?tqY2AZU=AOEmaegn#CE(A@JE{xL=TcbzN##Df*Nj!zF^N%&!nOp z$PN*D8Pm*=H8@ro9XQaFdISGlka>)W;kiv=9Z@(IZ>Rwm*#{rYI>mbgUv6+2Cxlct z<8--0dSGyWb$FBx^>6E`4)^hw=!lZxXyir(`Jx%Vsbw=bKuG(rFd7kx(r?Cth zGc8&xu@JZJ4I;Wy?I~AoJk4zUzXId$Ie?A-#rpm$80YT-{=%64GH?Mn4V(a80CoTm z1HWP%e+l>jI1juA>;u?1e-E$=xB%N<0mrzX*O@PQNCqSWk^#wpWI!??8ITM}1|$Rj zCkB>#U^V(gAMG3JUEWb@GCeHjrs4rlG(M$0p34;aDN&$P(l8{LCI?W(?EWbUuL*&)_CEi=kJ+JV?CU#=l8X)i literal 0 HcmV?d00001 diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 76704f99..69de0837 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -39,8 +39,9 @@ workflow ROSETTAFOLD_ALL_ATOM { take: ch_versions // channel: [ path(versions.yml) ] ch_bfd // channel: path(bfd) - ch_small_bfd // channel: path(small_bfd) ch_uniref30 // channel: path(uniref30) + ch_blast + ch_pdb100 main: ch_multiqc_files = Channel.empty() @@ -57,9 +58,6 @@ workflow ROSETTAFOLD_ALL_ATOM { // RUN_ROSETTAFOLD_ALL_ATOM ( ch_file, - ch_bfd, - ch_small_bfd, - ch_uniref30, ) ch_multiqc_rep = RUN_ROSETTAFOLD_ALL_ATOM.out.multiqc.collect() ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) @@ -71,37 +69,37 @@ workflow ROSETTAFOLD_ALL_ATOM { .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) .set { ch_collated_versions } - // - // MODULE: MultiQC - // - ch_multiqc_report = Channel.empty() - if (!params.skip_multiqc) { - ch_multiqc_report = Channel.empty() - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - ch_multiqc_report = MULTIQC.out.report.toList() - } +// // +// // MODULE: MultiQC +// // +// ch_multiqc_report = Channel.empty() +// if (!params.skip_multiqc) { +// ch_multiqc_report = Channel.empty() +// ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +// ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() +// ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() +// summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") +// ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) +// ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) +// ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) +// +// ch_multiqc_files = Channel.empty() +// ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) +// ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) +// ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) +// ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) +// +// MULTIQC ( +// ch_multiqc_files.collect(), +// ch_multiqc_config.toList(), +// ch_multiqc_custom_config.toList(), +// ch_multiqc_logo.toList() +// ) +// ch_multiqc_report = MULTIQC.out.report.toList() +// } emit: - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html +// multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } From 125a702e9109187a4855da6862b93916401c3e49 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 23 Oct 2024 12:28:51 +1100 Subject: [PATCH 065/135] Aligning input channels for RFAA --- modules/local/.run_rosettafold_all_atom.nf.swp | Bin 12288 -> 0 bytes modules/local/run_rosettafold_all_atom.nf | 2 +- workflows/.rosettafold_all_atom.nf.swp | Bin 16384 -> 0 bytes 3 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 modules/local/.run_rosettafold_all_atom.nf.swp delete mode 100644 workflows/.rosettafold_all_atom.nf.swp diff --git a/modules/local/.run_rosettafold_all_atom.nf.swp b/modules/local/.run_rosettafold_all_atom.nf.swp deleted file mode 100644 index d0cf306e72eda729772405a774e8858dcae18ffd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2&2QsG7>8%Mm1UO-gt)$Iijq``oo)nFrCE`s1uAvhsvj#PTB3A3+z24`9zs$ z^(+2D`Mvr)?;do1+JH8o4QK<}fHt5FXam}SHlPh?1KPlOHozUmzMEp~^Cyud{{LV7 z{{Q=F#(oFCf^WcQ;6v~MxDNij%vb=1zy)2f2B^YCq_TGWt%eH ze_WjTz)i2QGbrX~6cSRI4Hk+qjk0NSaw?W^#D{Veg-T6v+BvgxCS^lK!O8GHYsd#> zQ=8bhyS?3eBj32&!^YiSzHvA8Try^q7lTc;S|Z#hUuKON85Ls|yq1Rr_S@z;*r|RyExd=3|BK3p{NsLU|lZ?u8;D>_>(=Ou2QPtRR#Y zcds_{P0g5Oo^-Vg*y8NPvRoTgOX9XP4@M%SZkNK$_v!eU?zy1u&m+pa^5n^wtS+NT zBJkwGY00Tuin!UTvQC47%$MFR`z)q@e?86`MZrgzj5AD^aHu~ri+w@++^+{h@%a$&GoJ2yja#(m-@Zc z)qZb#{RS1A!O4k4N?{}^&EhypB)uMm4mWAT7kIKL151%T#`;*a<%Pq{=ZPl|@ogh= z0v?hVrV{S}vrHEySU-r8vFk?%`9tn{zA$Ap7NK`X)N9RJ!}LOlw}-H0{hb=}b9k*u zwSWf$UfXM+h!NMIYh9Xa(2G_*5(3J^es9UKZ5U7?Kb1?BpwAdXB-$kcaET%w{b>n<(1{Ee$bimfZ!ZDeLF} diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 61880dda..4b0f253c 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -28,7 +28,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { apptainer run --nv -B /mnt/af2,/srv \ --env blast_path="${params.blast_path}" \ --env bfd_path="${params.bfd_path}" \ - --env uniref30_path="${params.uniref30_variable}" \ + --env uniref30_path="${params.uniref30}" \ --env pdb100="${params.pdb100_path}" \ RoseTTAFold_All_Atom.sif "$fasta" diff --git a/workflows/.rosettafold_all_atom.nf.swp b/workflows/.rosettafold_all_atom.nf.swp deleted file mode 100644 index 3c05258597046fca78e3557aef04334eb5964854..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHNTWl0n7(R-isEENJ`l5$|bc`gL|@{>wSv z`OiOdF5_JX2L|cRVi&=2Hz99U@0(b?@23y8JhzLy^l0~vo!#A!u}4cCQ(CCGTK7^S z@iQgxr+eUiNy0I35@|I*kG9Vd{3`hnf1CjyBfMh^2@Ly#h ztlvzIqjER%3LfI;n{wwB{CWdFKa+dTFXc%xAQ_MhNCqSWk^#wpWI!??8ITM}1|$QL zfqx+bnoh_&DA)aL0s!m(V*Gz)Eg|m$$AE``Hee(00PyEsg!~S?4ZH=s0qh3W183F{ zavJy;I0d{690z)V9$+u98dwERJ30BqnzU@y=PYylnwzPpW(Z-LK%_kfc?4cGwO3#FOsslnd9J8~IPa#y$h2lhM!`u`dK(&}#?g>fg^|VTSBnB{JyE$}*7jn> zU0egq)iqypz!$gP!YtiGO-9$%B|Fg$Mr~@q>^o`&VNjx>9~m9Al<2o@#?G3Iv}|v}Yxokb(&pih_oPu{ z`ku323nvTthKwaUV5*eLHo{bRSQu6{&o!-yw2rA`e*YRAb4k|hBD9xwoOt9#YP4Ef zvzBoqIzy~{&5t84Wj)1XG`k-}X)#fU~4e!-O$Y zfkocM<01*wgS@1p-qYp53QJw0>>aTjDoSyHh@PZ!RC9G2XwwEfB;Kje5DM`*V~H7v zv%V0F6aMhnkUCr)=^Gs__g5>uYPnKT%cIr7B8Vso$)pg?EeBgm>hib|Zx5t;R)FL` z`vq$(MRpQ)v6v-Vh#xnqKQgxOP<8mp{z~;wi4I3D9nS1AEMt#h8Ht_-*vqJEzD^xa zH-v@w3}3VnDXak5;7I!q4+hJvM`2QA-Y?F^v7*22*pL@TE1a+ zcQK{(K(TpAODGA_ppZ&idEC@f>Ga0}HAtlzB2ZFk_9LtDY~o7DvEI=^pC?Q#)TRv7 zM!XMbt7t^_AX^o3Y?+R@9Ed?WSRNRnBEXi%dOdwZXji~{(hnJNg%Zsi%JlU z3wi>@_G+4~M4@E|s%tLDMj~;wXFP};aj+@PEb75Ew=uIhA3Pyu?IztsywsET3tDus zl3%A?tqY2AZU=AOEmaegn#CE(A@JE{xL=TcbzN##Df*Nj!zF^N%&!nOp z$PN*D8Pm*=H8@ro9XQaFdISGlka>)W;kiv=9Z@(IZ>Rwm*#{rYI>mbgUv6+2Cxlct z<8--0dSGyWb$FBx^>6E`4)^hw=!lZxXyir(`Jx%Vsbw=bKuG(rFd7kx(r?Cth zGc8&xu@JZJ4I;Wy?I~AoJk4zUzXId$Ie?A-#rpm$80YT-{=%64GH?Mn4V(a80CoTm z1HWP%e+l>jI1juA>;u?1e-E$=xB%N<0mrzX*O@PQNCqSWk^#wpWI!??8ITM}1|$Rj zCkB>#U^V(gAMG3JUEWb@GCeHjrs4rlG(M$0p34;aDN&$P(l8{LCI?W(?EWbUuL*&)_CEi=kJ+JV?CU#=l8X)i From ef6f516b456d3eb61880dddbc7fbbbaeec1dece5 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 23 Oct 2024 15:08:58 +1100 Subject: [PATCH 066/135] Runs through rfaa -profile test and -stub successfully --- main.nf | 1 + modules/local/run_rosettafold_all_atom.nf | 27 +++++---- workflows/rosettafold_all_atom.nf | 74 +++++++++++------------ 3 files changed, 50 insertions(+), 52 deletions(-) diff --git a/main.nf b/main.nf index 269e1c06..bb8418df 100644 --- a/main.nf +++ b/main.nf @@ -225,6 +225,7 @@ workflow NFCORE_PROTEINFOLD { // WORKFLOW: Run nf-core/rosettafold_all_atom workflow // ROSETTAFOLD_ALL_ATOM ( + ch_samplesheet, ch_versions, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.blast, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 4b0f253c..c347438e 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -10,7 +10,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "RoseTTAFold_All_Atom.sif" +// container "RoseTTAFold_All_Atom.sif" input: tuple val(meta), path(fasta) @@ -25,18 +25,21 @@ process RUN_ROSETTAFOLD_ALL_ATOM { script: """ - apptainer run --nv -B /mnt/af2,/srv \ - --env blast_path="${params.blast_path}" \ - --env bfd_path="${params.bfd_path}" \ - --env uniref30_path="${params.uniref30}" \ - --env pdb100="${params.pdb100_path}" \ - RoseTTAFold_All_Atom.sif "$fasta" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python3 --version | sed 's/Python //g') - END_VERSIONS + apptainer exec -B /mnt/af2,/srv /srv/scratch/z5378336/RoseTTAFold_All_Atom.sif ls """ + +// apptainer run --nv -B /mnt/af2,/srv \ +// --env blast_path="${params.blast_path}" \ +// --env bfd_path="${params.bfd_path}" \ +// --env uniref30_path="${params.uniref30_rosettafold_all_atom_path}" \ +// --env pdb100="${params.pdb100_path}" \ +// /srv/scratch/z5378336/RoseTTAFold_All_Atom.sif "${fasta}" +// cat <<-END_VERSIONS > versions.yml +// +// "${task.process}": +// python: \$(python3 --version | sed 's/Python //g') +// END_VERSIONS +// """ stub: """ diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 69de0837..2eb6e88e 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -23,7 +23,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' // // SUBWORKFLOW: Consisting entirely of nf-core/modules // -include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' @@ -37,6 +37,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_prot workflow ROSETTAFOLD_ALL_ATOM { take: + ch_samplesheet ch_versions // channel: [ path(versions.yml) ] ch_bfd // channel: path(bfd) ch_uniref30 // channel: path(uniref30) @@ -45,19 +46,12 @@ workflow ROSETTAFOLD_ALL_ATOM { main: ch_multiqc_files = Channel.empty() - - // - // Create input channel from input file provided through params.input + // - Channel - .fromPath(params.input) - .set { ch_file } - - // - // SUBWORKFLOW: Run Rosettafold_All_Atom standard mode + // SUBWORKFLOW: Run Rosettafold_All_Atom // RUN_ROSETTAFOLD_ALL_ATOM ( - ch_file, + ch_samplesheet ) ch_multiqc_rep = RUN_ROSETTAFOLD_ALL_ATOM.out.multiqc.collect() ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) @@ -69,37 +63,37 @@ workflow ROSETTAFOLD_ALL_ATOM { .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) .set { ch_collated_versions } -// // -// // MODULE: MultiQC -// // -// ch_multiqc_report = Channel.empty() -// if (!params.skip_multiqc) { -// ch_multiqc_report = Channel.empty() -// ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -// ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() -// ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() -// summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") -// ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) -// ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) -// ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) -// -// ch_multiqc_files = Channel.empty() -// ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) -// ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) -// ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) -// ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) -// -// MULTIQC ( -// ch_multiqc_files.collect(), -// ch_multiqc_config.toList(), -// ch_multiqc_custom_config.toList(), -// ch_multiqc_logo.toList() -// ) -// ch_multiqc_report = MULTIQC.out.report.toList() -// } + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + ch_multiqc_report = MULTIQC.out.report.toList() + } emit: -// multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } From 1f6e1dd6708cf81af84f7788bc5943e331b53aa6 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 23 Oct 2024 16:57:04 +1100 Subject: [PATCH 067/135] modified: modules/local/run_rosettafold_all_atom.nf --- modules/local/run_rosettafold_all_atom.nf | 25 ++++++++++------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index c347438e..ba396f32 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -25,21 +25,18 @@ process RUN_ROSETTAFOLD_ALL_ATOM { script: """ - apptainer exec -B /mnt/af2,/srv /srv/scratch/z5378336/RoseTTAFold_All_Atom.sif ls + apptainer run --nv -B /mnt/af2,/srv \ + --env blast_path="${params.blast_path}" \ + --env bfd_path="${params.bfd_path}" \ + --env uniref30_path="${params.uniref30_rosettafold_all_atom_path}" \ + --env pdb100="${params.pdb100_path}" \ + /srv/scratch/z5378336/RoseTTAFold_All_Atom.sif "${fasta}" + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + python: \$(python3 --version | sed 's/Python //g') + END_VERSIONS """ - -// apptainer run --nv -B /mnt/af2,/srv \ -// --env blast_path="${params.blast_path}" \ -// --env bfd_path="${params.bfd_path}" \ -// --env uniref30_path="${params.uniref30_rosettafold_all_atom_path}" \ -// --env pdb100="${params.pdb100_path}" \ -// /srv/scratch/z5378336/RoseTTAFold_All_Atom.sif "${fasta}" -// cat <<-END_VERSIONS > versions.yml -// -// "${task.process}": -// python: \$(python3 --version | sed 's/Python //g') -// END_VERSIONS -// """ stub: """ From 4b68ed84406cdeeac7cefb0209adc30c13987365 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 28 Oct 2024 14:54:14 +1100 Subject: [PATCH 068/135] Debugging RFAA --- main.nf | 4 ++-- modules/local/.run_rosettafold_all_atom.nf.swp | Bin 0 -> 12288 bytes modules/local/run_rosettafold_all_atom.nf | 16 +++++++++------- .../local/prepare_rosettafold_all_atom_dbs.nf | 4 +--- workflows/rosettafold_all_atom.nf | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) create mode 100644 modules/local/.run_rosettafold_all_atom.nf.swp diff --git a/main.nf b/main.nf index bb8418df..b0c018ee 100644 --- a/main.nf +++ b/main.nf @@ -217,7 +217,7 @@ workflow NFCORE_PROTEINFOLD { params.bfd_path, params.uniref30_rosettafold_all_atom_path, params.pdb100_path, - params.blast_path, + params.blast_path ) ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.versions) @@ -230,7 +230,7 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.blast, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100 ) // ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) diff --git a/modules/local/.run_rosettafold_all_atom.nf.swp b/modules/local/.run_rosettafold_all_atom.nf.swp new file mode 100644 index 0000000000000000000000000000000000000000..6c002c464247937e4714ea085caab84db3272681 GIT binary patch literal 12288 zcmeI2&2QX97>B2D00K!72(G-CNc*9zopjX@ZJSD4vMrI)B-$iJf}+)|J>Ip}U-gXh z)kNGX(Oi(Yz?C8{NZgC~54a-1fdf}Ah#My)e*V}bK$>y{G$TE-*M2?o&NHtqMVbEW z?d4mvV9qkM7a6-h@!H_o3G1szcV^lC<>s3U&E|Y|>%2DNiif;;UPyk=lF7Cu?Sw0P zXpK9!;v`bS3*E?ftRQkyUr5W3Z0=i08upS%3Z-~l&0<8^xZrL9u|`cxax2DAZfKpW5ov;l2E8_)*+?*>$Sfqj5UK2=Wi za(TS)mt*-;AG85&KpW5ov;l2E8_)){0c}7V&<3;tZQws>fIEzR`W#~)P9S;w|9|}V z|L+$W`w9FA9)T~w5qJQugI}f?3%~%l;8pMnm;gUdGWH$#8axC?;6B&~?}B+S1t!7M z;D;9&`yPA?c0ml@23Np5xCo}eZ_hLK4fql~1fPLVz(?R6U;z_61AfJPJ_jFzBaqGS z0nkkw&<3;tZ9p5)2DAZf;9p|kkP(_;82PcYy41Vf>255qt#0HOvYUKo)S^4n)d5bZ z(byJ=^rG;9hJriPn4<%d!l8<^Zmw=o8!?u3g`_n@?e*oBS+}f8E$_>izNfexnb>)1 z3n>qcR({#$imqO5oF1C4Q8|h<_X3~gaVkpfA(~kqrc_0prUU1homE3+( zoO$N9*4XJ6`IQo+O8c#1%yYfOdukYkxtij%d1B{8%6c9JCxgdp_@^ESEsymjHT)r2 z`43?aBQlmc*SBuA=&dMJB2EM9=bR$naizuJU5+&pGvKuob0#CJ$YaxH6h^vp6{5U8`edXP?PhzU$98U_RZWNUm zr7BL5E}@l5@@}gY)WY2CtX0Qa6{^bXeIF(F_sePMCBki<^?>W-;n@%>jeSAe+^+^g z@!CwjDAl}(i+sE=Pp2Fbo)z~(k&tor)@>fgMS!%r*r&a@*}j2&+KsmFH`>^z-EhRV zDLvOHYPfjAvphtSL`l_H$#!{DcWbrRUEAnnPp_>k_1Y^dz4qqXEh;vHlM@L^VWdc= zaU3Not1O3`wC)SMca(yq$Q~g}i#EJ)kor9F)E<7VM^3;)@gna zw3B@TT+bJ#ibf*z_C>WaQ>mFAN;i?hR@L_^kl^sj3{~=4t-N2uj1X6&Yl}2jqkAly z5SFO3>nTFtDAo{JkQ#b&JcS((CDka$qP$0>cYGo5xp3t3zVNA1ewliKaJ)3A0x<#+47*`>g*4VP3QXn literal 0 HcmV?d00001 diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index ba396f32..1f8c7a6b 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -10,10 +10,14 @@ process RUN_ROSETTAFOLD_ALL_ATOM { error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") } -// container "RoseTTAFold_All_Atom.sif" +// container "/srv/scratch/z5378336/apptainers/RoseTTAFold-All-Atom-dev.sif" input: tuple val(meta), path(fasta) +// path ('bfd/*') +// path ('uniref30/*') +// path ('blast/*') +// path ('pdb100/*') output: path ("${fasta.baseName}*") @@ -24,13 +28,11 @@ process RUN_ROSETTAFOLD_ALL_ATOM { task.ext.when == null || task.ext.when script: +// mamba run --name RFAA python -m rf2aa.run_inference --config-name "${fasta}" """ - apptainer run --nv -B /mnt/af2,/srv \ - --env blast_path="${params.blast_path}" \ - --env bfd_path="${params.bfd_path}" \ - --env uniref30_path="${params.uniref30_rosettafold_all_atom_path}" \ - --env pdb100="${params.pdb100_path}" \ - /srv/scratch/z5378336/RoseTTAFold_All_Atom.sif "${fasta}" + echo "DEBUG: Contents of bfd path:" + ls -lh / + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf index 84e01a67..4a281c0e 100644 --- a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf +++ b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf @@ -12,7 +12,6 @@ workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { pdb100_path main: - ch_versions = Channel.empty() ch_bfd = Channel.value(file(bfd_path)) ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path, type: 'any')) ch_blast = Channel.value(file(blast_path, type: 'string')) @@ -21,7 +20,6 @@ workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { emit: bfd = ch_bfd uniref30 = ch_uniref30 - pdb100 = ch_pdb100 blast = ch_blast - versions = ch_versions + pdb100 = ch_pdb100 } diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 2eb6e88e..7028b1fc 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -39,10 +39,10 @@ workflow ROSETTAFOLD_ALL_ATOM { take: ch_samplesheet ch_versions // channel: [ path(versions.yml) ] - ch_bfd // channel: path(bfd) - ch_uniref30 // channel: path(uniref30) - ch_blast - ch_pdb100 +// ch_bfd // channel: path(bfd) +// ch_uniref30 // channel: path(uniref30) +// ch_blast +// ch_pdb100 main: ch_multiqc_files = Channel.empty() From 0eba56dbedd523c98693ebd137c312937a195e70 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 28 Oct 2024 14:54:32 +1100 Subject: [PATCH 069/135] Debugging RFAA --- modules/local/.run_rosettafold_all_atom.nf.swp | Bin 12288 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 modules/local/.run_rosettafold_all_atom.nf.swp diff --git a/modules/local/.run_rosettafold_all_atom.nf.swp b/modules/local/.run_rosettafold_all_atom.nf.swp deleted file mode 100644 index 6c002c464247937e4714ea085caab84db3272681..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2&2QX97>B2D00K!72(G-CNc*9zopjX@ZJSD4vMrI)B-$iJf}+)|J>Ip}U-gXh z)kNGX(Oi(Yz?C8{NZgC~54a-1fdf}Ah#My)e*V}bK$>y{G$TE-*M2?o&NHtqMVbEW z?d4mvV9qkM7a6-h@!H_o3G1szcV^lC<>s3U&E|Y|>%2DNiif;;UPyk=lF7Cu?Sw0P zXpK9!;v`bS3*E?ftRQkyUr5W3Z0=i08upS%3Z-~l&0<8^xZrL9u|`cxax2DAZfKpW5ov;l2E8_)*+?*>$Sfqj5UK2=Wi za(TS)mt*-;AG85&KpW5ov;l2E8_)){0c}7V&<3;tZQws>fIEzR`W#~)P9S;w|9|}V z|L+$W`w9FA9)T~w5qJQugI}f?3%~%l;8pMnm;gUdGWH$#8axC?;6B&~?}B+S1t!7M z;D;9&`yPA?c0ml@23Np5xCo}eZ_hLK4fql~1fPLVz(?R6U;z_61AfJPJ_jFzBaqGS z0nkkw&<3;tZ9p5)2DAZf;9p|kkP(_;82PcYy41Vf>255qt#0HOvYUKo)S^4n)d5bZ z(byJ=^rG;9hJriPn4<%d!l8<^Zmw=o8!?u3g`_n@?e*oBS+}f8E$_>izNfexnb>)1 z3n>qcR({#$imqO5oF1C4Q8|h<_X3~gaVkpfA(~kqrc_0prUU1homE3+( zoO$N9*4XJ6`IQo+O8c#1%yYfOdukYkxtij%d1B{8%6c9JCxgdp_@^ESEsymjHT)r2 z`43?aBQlmc*SBuA=&dMJB2EM9=bR$naizuJU5+&pGvKuob0#CJ$YaxH6h^vp6{5U8`edXP?PhzU$98U_RZWNUm zr7BL5E}@l5@@}gY)WY2CtX0Qa6{^bXeIF(F_sePMCBki<^?>W-;n@%>jeSAe+^+^g z@!CwjDAl}(i+sE=Pp2Fbo)z~(k&tor)@>fgMS!%r*r&a@*}j2&+KsmFH`>^z-EhRV zDLvOHYPfjAvphtSL`l_H$#!{DcWbrRUEAnnPp_>k_1Y^dz4qqXEh;vHlM@L^VWdc= zaU3Not1O3`wC)SMca(yq$Q~g}i#EJ)kor9F)E<7VM^3;)@gna zw3B@TT+bJ#ibf*z_C>WaQ>mFAN;i?hR@L_^kl^sj3{~=4t-N2uj1X6&Yl}2jqkAly z5SFO3>nTFtDAo{JkQ#b&JcS((CDka$qP$0>cYGo5xp3t3zVNA1ewliKaJ)3A0x<#+47*`>g*4VP3QXn From 9a15bdf2498c5d1080fb2d4482b3681e72aac95d Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 29 Oct 2024 17:36:44 +1100 Subject: [PATCH 070/135] RFAA now working to produce structures --- conf/dbs.config | 5 +++-- main.nf | 4 ++-- modules/local/run_rosettafold_all_atom.nf | 19 ++++++++++--------- .../local/prepare_rosettafold_all_atom_dbs.nf | 12 +++++++----- workflows/rosettafold_all_atom.nf | 14 +++++++++----- 5 files changed, 31 insertions(+), 23 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 4a3becfc..336b9e14 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -91,8 +91,9 @@ params { pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' // RoseTTAFold paths - uniref30_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/uniref30/UniRef30_2021_03" - pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/pdb100_2021Mar03" +// uniref30_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/UniRef30_2020_06/*" + uniref30_rosettafold_all_atom_path = "/srv/scratch/sbf/UniRef30_2020_06/*" + pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/*" blast_path = "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" // Esmfold links diff --git a/main.nf b/main.nf index b0c018ee..3f95d082 100644 --- a/main.nf +++ b/main.nf @@ -227,10 +227,10 @@ workflow NFCORE_PROTEINFOLD { ROSETTAFOLD_ALL_ATOM ( ch_samplesheet, ch_versions, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.blast, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100 + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.blast ) // ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 1f8c7a6b..d99fb390 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -10,14 +10,14 @@ process RUN_ROSETTAFOLD_ALL_ATOM { error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") } -// container "/srv/scratch/z5378336/apptainers/RoseTTAFold-All-Atom-dev.sif" + container "/srv/scratch/z5378336/RoseTTAFold_All_Atom.sif" input: tuple val(meta), path(fasta) -// path ('bfd/*') -// path ('uniref30/*') -// path ('blast/*') -// path ('pdb100/*') + path ('bfd/*') + path ('UniRef30_2020_06/*') + path ('pdb100_2021Mar03/*') + path ('blast-2.2.26/*') output: path ("${fasta.baseName}*") @@ -28,11 +28,12 @@ process RUN_ROSETTAFOLD_ALL_ATOM { task.ext.when == null || task.ext.when script: -// mamba run --name RFAA python -m rf2aa.run_inference --config-name "${fasta}" """ - echo "DEBUG: Contents of bfd path:" - ls -lh / - + ln -s /app/RoseTTAFold-All-Atom/make_msa.sh . + mamba run --name RFAA python -m rf2aa.run_inference \ + --config-dir $PWD --config-path $PWD \ + --config-name "${fasta}" + cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf index 4a281c0e..51109003 100644 --- a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf +++ b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf @@ -8,18 +8,20 @@ workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { take: bfd_path // directory: /path/to/bfd/ uniref30_rosettafold_all_atom_path // directory: /path/to/uniref30/rosettafold_all_atom/ - blast_path pdb100_path + blast_path main: - ch_bfd = Channel.value(file(bfd_path)) - ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path, type: 'any')) - ch_blast = Channel.value(file(blast_path, type: 'string')) + ch_bfd = Channel.value(file(bfd_path, type: 'string')) + ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path)) ch_pdb100 = Channel.value(file(pdb100_path)) + ch_blast = Channel.value(file(blast_path)) + ch_versions = Channel.empty() emit: bfd = ch_bfd uniref30 = ch_uniref30 - blast = ch_blast pdb100 = ch_pdb100 + blast = ch_blast + versions = ch_versions } diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 7028b1fc..3433d549 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -39,10 +39,10 @@ workflow ROSETTAFOLD_ALL_ATOM { take: ch_samplesheet ch_versions // channel: [ path(versions.yml) ] -// ch_bfd // channel: path(bfd) -// ch_uniref30 // channel: path(uniref30) -// ch_blast -// ch_pdb100 + ch_bfd // channel: path(bfd) + ch_uniref30 // channel: path(uniref30) + ch_pdb100 + ch_blast main: ch_multiqc_files = Channel.empty() @@ -51,7 +51,11 @@ workflow ROSETTAFOLD_ALL_ATOM { // SUBWORKFLOW: Run Rosettafold_All_Atom // RUN_ROSETTAFOLD_ALL_ATOM ( - ch_samplesheet + ch_samplesheet, + ch_bfd, + ch_uniref30, + ch_pdb100, + ch_blast ) ch_multiqc_rep = RUN_ROSETTAFOLD_ALL_ATOM.out.multiqc.collect() ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) From 51878df350f4f2b3fdd8b72953ae2160db9efbb5 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 30 Oct 2024 16:57:14 +1100 Subject: [PATCH 071/135] Modified rfaa output to properly emit PDB file --- modules/local/run_rosettafold_all_atom.nf | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index d99fb390..061e485e 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -30,12 +30,20 @@ process RUN_ROSETTAFOLD_ALL_ATOM { script: """ ln -s /app/RoseTTAFold-All-Atom/make_msa.sh . - mamba run --name RFAA python -m rf2aa.run_inference \ - --config-dir $PWD --config-path $PWD \ - --config-name "${fasta}" - cat <<-END_VERSIONS > versions.yml + mamba run --name RFAA python -m rf2aa.run_inference \ + --config-dir $PWD \ + --config-path $PWD \ + --config-name "${fasta}" + + cp "${fasta.baseName}"/*.pdb ./"${fasta.baseName}".rosettafold_all_atom.pdb + cd "${fasta.baseName}" + awk '{print \$6"\\t"\$11}' "${fasta.baseName}".rosettafold_all_atom.pdb | uniq > plddt.tsv + echo -e Positions"\\t" > header.tsv + cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv + cd .. + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') END_VERSIONS From 076db5a0c04d632390f1825ddb564eb362a3af62 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 30 Oct 2024 17:45:20 +1100 Subject: [PATCH 072/135] Fixed renaming pdb --- modules/local/run_rosettafold_all_atom.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 061e485e..d72a9cbc 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -36,8 +36,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { --config-path $PWD \ --config-name "${fasta}" - cp "${fasta.baseName}"/*.pdb ./"${fasta.baseName}".rosettafold_all_atom.pdb - cd "${fasta.baseName}" + cp "${fasta.baseName}".pdb ./"${fasta.baseName}".rosettafold_all_atom.pdb awk '{print \$6"\\t"\$11}' "${fasta.baseName}".rosettafold_all_atom.pdb | uniq > plddt.tsv echo -e Positions"\\t" > header.tsv cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv From c5231391fd55a683d32dcc7686aeeb1a1c50783a Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 1 Nov 2024 16:34:25 +1100 Subject: [PATCH 073/135] Pipeline now completes successfully --- conf/dbs.config | 5 ++--- main.nf | 6 ++---- modules/local/run_rosettafold_all_atom.nf | 11 +++++------ .../local/prepare_rosettafold_all_atom_dbs.nf | 5 +---- workflows/rosettafold_all_atom.nf | 8 ++++---- 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 336b9e14..0b8308ce 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -45,7 +45,6 @@ params { // Alphafold paths - bfd_path = "${params.alphafold2_db}/${bfd_name}/*" small_bfd_path = "${params.alphafold2_db}/${smallbfd_name}/*" alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" mgnify_path = "${params.alphafold2_db}/${mgnify_name}/*" @@ -91,10 +90,10 @@ params { pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' // RoseTTAFold paths -// uniref30_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/UniRef30_2020_06/*" - uniref30_rosettafold_all_atom_path = "/srv/scratch/sbf/UniRef30_2020_06/*" + uniref30_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/uniref30/UniRef30_2020_06/*" pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/*" blast_path = "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" + bfd_path = "${params.rosettafold_all_atom_db}/bfd/*" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/main.nf b/main.nf index 3f95d082..cf63dcb0 100644 --- a/main.nf +++ b/main.nf @@ -216,8 +216,7 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS ( params.bfd_path, params.uniref30_rosettafold_all_atom_path, - params.pdb100_path, - params.blast_path + params.pdb100_path ) ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.versions) @@ -229,8 +228,7 @@ workflow NFCORE_PROTEINFOLD { ch_versions, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.blast + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100 ) // ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index d72a9cbc..b70bd34e 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -17,11 +17,11 @@ process RUN_ROSETTAFOLD_ALL_ATOM { path ('bfd/*') path ('UniRef30_2020_06/*') path ('pdb100_2021Mar03/*') - path ('blast-2.2.26/*') output: path ("${fasta.baseName}*") - path "*_mqc.tsv", emit: multiqc + tuple val(meta), path ("${fasta.baseName}*pdb"), emit: pdb + tuple val(meta), path ("*_mqc.tsv"), emit: multiqc path "versions.yml", emit: versions when: @@ -29,7 +29,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { script: """ - ln -s /app/RoseTTAFold-All-Atom/make_msa.sh . + ln -s /app/RoseTTAFold-All-Atom/* . mamba run --name RFAA python -m rf2aa.run_inference \ --config-dir $PWD \ @@ -39,8 +39,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { cp "${fasta.baseName}".pdb ./"${fasta.baseName}".rosettafold_all_atom.pdb awk '{print \$6"\\t"\$11}' "${fasta.baseName}".rosettafold_all_atom.pdb | uniq > plddt.tsv echo -e Positions"\\t" > header.tsv - cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv - cd .. + cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -55,7 +54,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + python: \$(python3 --version | sed 's/Python //g') END_VERSIONS """ } diff --git a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf index 51109003..23a844e8 100644 --- a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf +++ b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf @@ -9,19 +9,16 @@ workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { bfd_path // directory: /path/to/bfd/ uniref30_rosettafold_all_atom_path // directory: /path/to/uniref30/rosettafold_all_atom/ pdb100_path - blast_path main: - ch_bfd = Channel.value(file(bfd_path, type: 'string')) + ch_bfd = Channel.value(file(bfd_path)) ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path)) ch_pdb100 = Channel.value(file(pdb100_path)) - ch_blast = Channel.value(file(blast_path)) ch_versions = Channel.empty() emit: bfd = ch_bfd uniref30 = ch_uniref30 pdb100 = ch_pdb100 - blast = ch_blast versions = ch_versions } diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf index 3433d549..4861e35d 100644 --- a/workflows/rosettafold_all_atom.nf +++ b/workflows/rosettafold_all_atom.nf @@ -42,7 +42,6 @@ workflow ROSETTAFOLD_ALL_ATOM { ch_bfd // channel: path(bfd) ch_uniref30 // channel: path(uniref30) ch_pdb100 - ch_blast main: ch_multiqc_files = Channel.empty() @@ -54,8 +53,7 @@ workflow ROSETTAFOLD_ALL_ATOM { ch_samplesheet, ch_bfd, ch_uniref30, - ch_pdb100, - ch_blast + ch_pdb100 ) ch_multiqc_rep = RUN_ROSETTAFOLD_ALL_ATOM.out.multiqc.collect() ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) @@ -91,7 +89,9 @@ workflow ROSETTAFOLD_ALL_ATOM { ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + [], + [] ) ch_multiqc_report = MULTIQC.out.report.toList() } From 70e8b6b37dc1e358a0bbbf240911b442dc842902 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 1 Nov 2024 16:48:49 +1100 Subject: [PATCH 074/135] Cleaned up test configs --- nextflow.config | 44 +------------------------------------------- nextflow.config.1 | 0 2 files changed, 1 insertion(+), 43 deletions(-) create mode 100644 nextflow.config.1 diff --git a/nextflow.config b/nextflow.config index 0a0e374d..fc0b1172 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,48 +20,6 @@ params { full_dbs = false // true full_dbs, false reduced_dbs alphafold2_model_preset = "monomer" // for AF2 {monomer (default), monomer_casp14, monomer_ptm, multimer} alphafold2_db = null - - // Database prefixes - bfd_prefix = null - smallbfd_prefix = null - mgnify_prefix = null - pdb70_prefix = null - pdb_mmcif_prefix = null - uniref30_prefix = null - uniref90_prefix = null - pdb_seq_prefix = null - uniprot_prefix = null - alphafold_params_prefix = null - mmcif_path = null - mmcif_obsolete = null - uniref30_db = null - bfd_first_non_consensus_sequences = null - uniprot_fasta = null - pdb_seqres_txt = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt = null - uniref90_fasta = null - mgy_clusters_fasta = null - uniref30_prefix = null - - bfd_name = null - smallbfd_name = null - mgnify_name = null - pdb70_name = null - pdb_mmcif_name = null - uniref30_name = null - uniref90_name = null - pdb_seqres_name = null - uniprot_name = null - alphafold_params_name = null - mmcif_files_name = null - mmcif_obsolete_name = null - uniref30_db_name = null - bfd_first_non_consensus_sequences_name = null - uniprot_fasta_name = null - pdb_seqres_txt_name = null - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = null - uniref90_fasta_name = null - mgy_clusters_fasta_name = null // Alphafold2 links bfd_link = null @@ -172,7 +130,7 @@ params { } // Load base.config by default for all pipelines -includeConfig 'conf/katana.config' +includeConfig 'conf/base.config' profiles { debug { diff --git a/nextflow.config.1 b/nextflow.config.1 new file mode 100644 index 00000000..e69de29b From 902ebaf02b31d55dddaa482b611bda5d982a224c Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 1 Nov 2024 17:01:56 +1100 Subject: [PATCH 075/135] Built schema as per CONTRIBUTING.md --- conf/dbs.config | 81 +++++------------- nextflow.config.1 | 0 nextflow_schema.json | 198 +++++++++++++------------------------------ 3 files changed, 82 insertions(+), 197 deletions(-) delete mode 100644 nextflow.config.1 diff --git a/conf/dbs.config b/conf/dbs.config index 0b8308ce..6b4f6ddb 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -9,67 +9,30 @@ params { // AlphaFold2 links - bfd = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' - small_bfd = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' - alphafold2_params = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar' - mgnify = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz' - pdb70 = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' - pdb_mmcif = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ - pdb_obsolete = 'ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat' - uniref30 = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniref30_2018_08_hhsuite.tar.gz' - uniref90 = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' - pdb_seqres = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' - uniprot_sprot = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' - uniprot_trembl = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - - bfd_name = params.bfd_prefix ?: 'bfd' - smallbfd_name = params.smallbfd_prefix ?: 'smallbfd' - mgnify_name = params.mgnify_prefix ?: 'mgnify' - pdb70_name = params.pdb70_prefix ?: 'pdb70' - pdb_mmcif_name = params.pdb_mmcif_prefix ?: 'pdb_mmcif' - uniref30_name = params.uniref30_prefix ?: 'uniref30' - uniref90_name = params.uniref90_prefix ?: 'uniref90' - pdb_seqres_name = params.pdb_seq_prefix ?: 'pdb_seqres' - uniprot_name = params.uniprot_prefix ?: 'uniprot' - alphafold_params_name = params.alphafold_params_prefix ?: 'params/alphafold_params_*' - mmcif_files_name = params.mmcif_path ?: 'pdb_mmcif/mmcif_files/' - mmcif_obsolete_name = params.mmcif_obsolete ?: 'pdb_mmcif/obsolete.dat' - - uniref30_db_name = params.uniref30_db ?: 'uniref30_2018_08' - bfd_first_non_consensus_sequences_name = params.bfd_first_non_consensus_sequences ?: 'bfd-first_non_consensus_sequences.fasta' - uniprot_fasta_name = params.uniprot_fasta ?: 'uniprot.fasta' - pdb_seqres_txt_name = params.pdb_seqres_txt ?: 'pdb_seqres.txt' - bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name = params.bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt ?: 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt' - uniref90_fasta_name = params.uniref90_fasta ?: 'uniref90.fasta' - mgy_clusters_fasta_name = params.mgy_clusters_fasta ?: 'mgy_clusters_2022_05.fa' - + bfd_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' + small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' + alphafold2_params_link = 'https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar' + mgnify_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/mgy_clusters_2022_05.fa.gz' + pdb70_link = 'http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz' + pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' //Other sources available: 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ + pdb_obsolete_link = 'https://files.wwpdb.org/pub/pdb/data/status/obsolete.dat' + uniref30_alphafold2_link = 'https://storage.googleapis.com/alphafold-databases/v2.3/UniRef30_2021_03.tar.gz' + uniref90_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' + pdb_seqres_link = 'https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' + uniprot_sprot_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' + uniprot_trembl_link = 'https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' // Alphafold paths - small_bfd_path = "${params.alphafold2_db}/${smallbfd_name}/*" - alphafold2_params_path = "${params.alphafold2_db}/${alphafold_params_name}/*" - mgnify_path = "${params.alphafold2_db}/${mgnify_name}/*" - pdb70_path = "${params.alphafold2_db}/${pdb70_name}/**" - pdb_mmcif_path = "${params.alphafold2_db}/${pdb_mmcif_name}/**" - uniref30_alphafold2_path = "${params.alphafold2_db}/${uniref30_name}/**" - uniref90_path = "${params.alphafold2_db}/${uniref90_name}/*" - pdb_seqres_path = "${params.alphafold2_db}/${pdb_seqres_name}/*" - uniprot_path = "${params.alphafold2_db}/${uniprot_name}/*" - - // Alphafold variables - bfd_dir_path = "${params.alphafold2_db}/${bfd_name}/" - small_bfd_dir_path = "${params.alphafold2_db}/${smallbfd_name}/" - mgnify_dir_path = "${params.alphafold2_db}/${mgnify_name}/" - pdb70_dir_path = "${params.alphafold2_db}/${pdb70_name}/" - pdb_mmcif_dir_path = "${params.alphafold2_db}/${pdb_mmcif_name}/" - uniref30_dir_path = "${params.alphafold2_db}/${uniref30_name}/" - uniref90_dir_path = "${params.alphafold2_db}/${uniref90_name}/" - pdb_seqres_dir_path = "${params.alphafold2_db}/${pdb_seqres_name}/" - uniprot_dir_path = "${params.alphafold2_db}/${uniprot_name}/" - - // Alphafold MSA Variables - mgnify_database_path = "${params.alphafold2_db}/${mgnify_name}/" - template_mmcif_dir = "${params.alphafold2_db}/${mmcif_files_name}/" - obsolete_pdbs_path = "${params.alphafold2_db}/${mmcif_obsolete_name}" + bfd_path = "${params.alphafold2_db}/bfd/*" + small_bfd_path = "${params.alphafold2_db}/small_bfd/*" + alphafold2_params_path = "${params.alphafold2_db}/alphafold_params_*/*" + mgnify_path = "${params.alphafold2_db}/mgnify/*" + pdb70_path = "${params.alphafold2_db}/pdb70/**" + pdb_mmcif_path = "${params.alphafold2_db}/pdb_mmcif/*" + uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" + uniref90_path = "${params.alphafold2_db}/uniref90/*" + pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" + uniprot_path = "${params.alphafold2_db}/uniprot/*" // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' diff --git a/nextflow.config.1 b/nextflow.config.1 deleted file mode 100644 index e69de29b..00000000 diff --git a/nextflow_schema.json b/nextflow_schema.json index 1895746a..203e6c8c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -416,26 +416,22 @@ "uniref30_alphafold2_path": { "type": "string", "description": "Path to the Uniref30 database", - "fa_icon": "fas fa-folder-open", - "default": "null/uniref30/**" + "fa_icon": "fas fa-folder-open" }, "uniref90_path": { "type": "string", "description": "Path to the UniRef90 database", - "fa_icon": "fas fa-folder-open", - "default": "null/uniref90/*" + "fa_icon": "fas fa-folder-open" }, "pdb_seqres_path": { "type": "string", "description": "Path to the PDB SEQRES database", - "fa_icon": "fas fa-folder-open", - "default": "null/pdb_seqres/*" + "fa_icon": "fas fa-folder-open" }, "uniprot_path": { "type": "string", "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", - "fa_icon": "fas fa-folder-open", - "default": "null/uniprot/*" + "fa_icon": "fas fa-folder-open" } } }, @@ -473,14 +469,12 @@ "colabfold_db_path": { "type": "string", "description": "Link to the Colabfold database", - "fa_icon": "fas fa-folder-open", - "default": "null/colabfold_envdb_202108" + "fa_icon": "fas fa-folder-open" }, "uniref30_colabfold_path": { "type": "string", "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-folder-open", - "default": "null/uniref30_2302" + "fa_icon": "fas fa-folder-open" }, "colabfold_alphafold2_params_path": { "type": "string", @@ -530,8 +524,7 @@ "esmfold_params_path": { "type": "string", "description": "Link to the Esmfold parameters", - "fa_icon": "fas fa-folder-open", - "default": "null/*" + "fa_icon": "fas fa-folder-open" } } }, @@ -680,62 +673,79 @@ } ], "properties": { - "bfd_prefix": { + "rosettafold_all_atom_db": { "type": "string" }, - "smallbfd_prefix": { - "type": "string" + "uniref30_rosettafold_all_atom_path": { + "type": "string", + "default": "null/uniref30/UniRef30_2020_06/*" }, - "mgnify_prefix": { - "type": "string" + "blast_path": { + "type": "string", + "default": "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" }, - "pdb70_prefix": { - "type": "string" + "pdb100_path": { + "type": "string", + "default": "null/pdb100_2021Mar03/*" }, - "pdb_mmcif_prefix": { + "RFAA_paper_weights_path": { "type": "string" }, - "uniref30_prefix": { - "type": "string" + "cpuQueue": { + "type": "string", + "default": "submission" }, - "uniref90_prefix": { - "type": "string" + "gpuQueue": { + "type": "string", + "default": "mwacgpu2" }, - "pdb_seq_prefix": { - "type": "string" + "bfd": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" }, - "uniprot_prefix": { - "type": "string" + "small_bfd": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" }, - "alphafold_params_prefix": { - "type": "string" + "alphafold2_params": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar" }, - "mmcif_path": { - "type": "string" + "mgnify": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" }, - "mmcif_obsolete": { - "type": "string" + "pdb70": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz" }, - "uniref30_db": { - "type": "string" + "pdb_mmcif": { + "type": "string", + "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/" }, - "bfd_first_non_consensus_sequences": { - "type": "string" + "pdb_obsolete": { + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" }, - "uniprot_fasta": { - "type": "string" + "uniref30": { + "type": "string", + "default": "http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz" }, - "pdb_seqres_txt": { - "type": "string" + "uniref90": { + "type": "string", + "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" }, - "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt": { - "type": "string" + "pdb_seqres": { + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" }, - "uniref90_fasta": { - "type": "string" + "uniprot_sprot": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" }, - "mgy_clusters_fasta": { - "type": "string" + "uniprot_trembl": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" }, "bfd_name": { "type": "string", @@ -813,79 +823,6 @@ "type": "string", "default": "mgy_clusters_2022_05.fa" }, - "rosettafold_all_atom_db": { - "type": "string" - }, - "uniref30_rosettafold_all_atom_path": { - "type": "string" - }, - "blast_path": { - "type": "string", - "default": "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" - }, - "pdb100_path": { - "type": "string", - "default": "null/pdb100/" - }, - "RFAA_paper_weights_path": { - "type": "string" - }, - "cpuQueue": { - "type": "string", - "default": "submission" - }, - "gpuQueue": { - "type": "string", - "default": "mwacgpu2" - }, - "bfd": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" - }, - "small_bfd": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" - }, - "alphafold2_params": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar" - }, - "mgnify": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" - }, - "pdb70": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz" - }, - "pdb_mmcif": { - "type": "string", - "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/" - }, - "pdb_obsolete": { - "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" - }, - "uniref30": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz" - }, - "uniref90": { - "type": "string", - "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" - }, - "pdb_seqres": { - "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" - }, - "uniprot_sprot": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" - }, - "uniprot_trembl": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" - }, "bfd_dir_path": { "type": "string", "default": "null/bfd/" @@ -937,21 +874,6 @@ "pdb100": { "type": "string", "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" - }, - "RFAA_paper_weights": { - "type": "string", - "default": "http://files.ipd.uw.edu/pub/RF-All-Atom/weights/RFAA_paper_weights.pt" - }, - "uniref30_variable": { - "type": "string", - "default": "null/uniref30/" - }, - "bfd_variable": { - "type": "string", - "default": "null/bfd/" - }, - "RFAA_paper_weights_variable": { - "type": "string" } } } From 8379c502424b53190117021ff1ecbeaddb911d7c Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 1 Nov 2024 17:19:37 +1100 Subject: [PATCH 076/135] Fixed db conflicts --- conf/dbs.config | 3 +-- main.nf | 2 +- subworkflows/local/prepare_rosettafold_all_atom_dbs.nf | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 6b4f6ddb..9ab25ea8 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -55,8 +55,7 @@ params { // RoseTTAFold paths uniref30_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/uniref30/UniRef30_2020_06/*" pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/*" - blast_path = "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" - bfd_path = "${params.rosettafold_all_atom_db}/bfd/*" + bfd_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/bfd/*" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/main.nf b/main.nf index cf63dcb0..b09d5c45 100644 --- a/main.nf +++ b/main.nf @@ -214,7 +214,7 @@ workflow NFCORE_PROTEINFOLD { // SUBWORKFLOW: Prepare Rosttafold-all-atom DBs // PREPARE_ROSETTAFOLD_ALL_ATOM_DBS ( - params.bfd_path, + params.bfd_rosettafold_all_atom_path, params.uniref30_rosettafold_all_atom_path, params.pdb100_path ) diff --git a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf index 23a844e8..e03de5c1 100644 --- a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf +++ b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf @@ -6,12 +6,12 @@ workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { take: - bfd_path // directory: /path/to/bfd/ + bfd_rosettafold_all_atom_path // directory: /path/to/bfd/ uniref30_rosettafold_all_atom_path // directory: /path/to/uniref30/rosettafold_all_atom/ pdb100_path main: - ch_bfd = Channel.value(file(bfd_path)) + ch_bfd = Channel.value(file(bfd_rosettafold_all_atom_path)) ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path)) ch_pdb100 = Channel.value(file(pdb100_path)) ch_versions = Channel.empty() From 7c9cf195b5265cde61399715a7e9456730607a48 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 4 Nov 2024 15:07:07 +1100 Subject: [PATCH 077/135] Troubleshooting benchmarks and having jobs queued and run by nextflow --- conf/katana.config | 77 +++++++++++------------ modules/local/run_alphafold2_pred.nf | 1 - modules/local/run_rosettafold_all_atom.nf | 2 +- 3 files changed, 37 insertions(+), 43 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index fcfe018e..c12d79c6 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -1,62 +1,52 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - nf-core/proteinfold Nextflow base config file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - A 'blank slate' config file, appropriate for general use on most high performance - compute environments. Assumes that all software is installed and available on - the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. ----------------------------------------------------------------------------------------- -*/ +// UNSW Katana nf-core configuration profile + + params { - cpuQueue = 'submission' - gpuQueue = 'mwacgpu2' + config_profile_description = 'UNSW Katana HPC profile provided by nf-core/configs' + config_profile_contact = '@jscgh' + config_profile_url = 'https://docs.restech.unsw.edu.au/' } process { + queue = 'submission' - // TODO nf-core: Check the defaults for all processes - cpus = { 1 * task.attempt } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } + resourceLimits = [ + memory: 500.GB, + cpus: 32, + time: 200.h + ] - //executor = 'pbspro' + executor = 'pbspro' errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { 1 } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { 2 * task.attempt } - memory = { 12.GB * task.attempt } - time = { 4.h * task.attempt } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { 6 * task.attempt } - memory = { 36.GB * task.attempt } - time = { 8.h * task.attempt } + cpus = { 8 * task.attempt } + memory = { 125.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { 12 * task.attempt } - memory = { 72.GB * task.attempt } - time = { 16.h * task.attempt } + cpus = { 16 * task.attempt } + memory = { 250.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_long { - time = { 20.h * task.attempt } + time = { 24.h * task.attempt } } withLabel:process_high_memory { - memory = { 200.GB * task.attempt } + memory = { 250.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' @@ -69,12 +59,17 @@ process { cache = false } withLabel:gpu_compute { - queue = "${params.gpuQueue}" accelerator = 1 - clusterOptions = { "-l select=1:ngpus=1:ncpus=${task.cpus}:mem=${task.memory.toMega()}mb" } - + clusterOptions = { "-lhost=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } containerOptions = { - workflow.containerEngine == "singularity" ? '--nv' : ( workflow.containerEngine == "docker" ? '--gpus all' : none ) + workflow.containerEngine == "singularity" ? '--nv --env CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES' : ( workflow.containerEngine == "docker" ? '--gpus all' : null ) } } + + singularity { + enabled = true + autoMounts = true + runOptions = "-B /mnt,/srv" + } + } diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred.nf index ca94dda3..bdf3ec92 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred.nf @@ -4,7 +4,6 @@ process RUN_ALPHAFOLD2_PRED { tag "$meta.id" label 'process_medium' - label 'gpu_compute' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index b70bd34e..d7625d07 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -3,7 +3,7 @@ */ process RUN_ROSETTAFOLD_ALL_ATOM { tag "$meta.id" - label 'process_medium' + label 'gpu_compute' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { From 7dd2e457bae8c14166b5a05cb3a55a5ee072d244 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 4 Nov 2024 15:52:57 +1100 Subject: [PATCH 078/135] Removed leftover blast-2.2.6 references --- conf/base.config | 6 ------ conf/katana.config | 23 +++++++++++------------ nextflow.config | 4 ++-- nextflow_schema.json | 4 ---- 4 files changed, 13 insertions(+), 24 deletions(-) diff --git a/conf/base.config b/conf/base.config index 247f62c2..7e7a42dd 100644 --- a/conf/base.config +++ b/conf/base.config @@ -7,10 +7,6 @@ the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. ---------------------------------------------------------------------------------------- */ -params { - cpuQueue = 'submission' - gpuQueue = 'mwacgpu2' -} process { @@ -19,8 +15,6 @@ process { memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } - //executor = 'pbspro' - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/conf/katana.config b/conf/katana.config index c12d79c6..924bec03 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -16,7 +16,10 @@ process { time: 200.h ] - executor = 'pbspro' + // TODO nf-core: Check the defaults for all processes + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 @@ -55,21 +58,17 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } withLabel:gpu_compute { accelerator = 1 clusterOptions = { "-lhost=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } - containerOptions = { - workflow.containerEngine == "singularity" ? '--nv --env CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES' : ( workflow.containerEngine == "docker" ? '--gpus all' : null ) - } - } - + } singularity { enabled = true autoMounts = true - runOptions = "-B /mnt,/srv" - } - + runOptions = "-B /mnt,/srv,${TMPDIR}:/tmp" + } + apptainer { + enabled = true + runOptions = "-B /mnt,/srv,${TMPDIR}:/tmp" + } } diff --git a/nextflow.config b/nextflow.config index fc0b1172..63fd619a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -82,9 +82,8 @@ params { // Rosettafold-all-atom parameters rosettafold_all_atom_db = null uniref30_rosettafold_all_atom_path = null - blast_path = null pdb100_path = null - RFAA_paper_weights_path = null + bfd_rosettafold_all_atom_path = null // Foldseek params foldseek_search = null @@ -256,6 +255,7 @@ profiles { test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } + katana { includeConfig 'conf/katana.config' } } // Load nf-core custom profiles from different Institutions diff --git a/nextflow_schema.json b/nextflow_schema.json index 203e6c8c..47426c15 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -680,10 +680,6 @@ "type": "string", "default": "null/uniref30/UniRef30_2020_06/*" }, - "blast_path": { - "type": "string", - "default": "/srv/scratch/z5378336/apptainers/blast-2.2.26/data" - }, "pdb100_path": { "type": "string", "default": "null/pdb100_2021Mar03/*" From a7aa7eb5d2b7a5446bbde26c43aca1b5865d781b Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 4 Nov 2024 16:04:22 +1100 Subject: [PATCH 079/135] Updated nextflow_schema --- nextflow_schema.json | 200 ++----------------------------------------- 1 file changed, 9 insertions(+), 191 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 47426c15..5000582a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -189,8 +189,7 @@ "type": "string", "description": "Specifies whether is a 'monomer' or 'multimer' prediction", "enum": ["monomer", "multimer"], - "fa_icon": "fas fa-stream", - "default": "monomer" + "fa_icon": "fas fa-stream" } } }, @@ -386,32 +385,27 @@ "small_bfd_path": { "type": "string", "description": "Path to a reduced version of the BFD database", - "fa_icon": "fas fa-folder-open", - "default": "null/smallbfd/*" + "fa_icon": "fas fa-folder-open" }, "alphafold2_params_path": { "type": "string", "description": "Path to the Alphafold2 parameters", - "fa_icon": "fas fa-folder-open", - "default": "null/params/alphafold_params_*/*" + "fa_icon": "fas fa-folder-open" }, "mgnify_path": { "type": "string", "description": "Path to the MGnify database", - "fa_icon": "fas fa-folder-open", - "default": "null/mgnify/*" + "fa_icon": "fas fa-folder-open" }, "pdb70_path": { "type": "string", "description": "Path to the PDB70 database", - "fa_icon": "fas fa-folder-open", - "default": "null/pdb70/**" + "fa_icon": "fas fa-folder-open" }, "pdb_mmcif_path": { "type": "string", "description": "Path to the PDB mmCIF database", - "fa_icon": "fas fa-folder-open", - "default": "null/pdb_mmcif/**" + "fa_icon": "fas fa-folder-open" }, "uniref30_alphafold2_path": { "type": "string", @@ -484,8 +478,7 @@ "colabfold_alphafold2_params_tags": { "type": "object", "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream", - "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" + "fa_icon": "fas fa-stream" } } }, @@ -684,189 +677,14 @@ "type": "string", "default": "null/pdb100_2021Mar03/*" }, - "RFAA_paper_weights_path": { - "type": "string" - }, - "cpuQueue": { - "type": "string", - "default": "submission" - }, - "gpuQueue": { - "type": "string", - "default": "mwacgpu2" - }, - "bfd": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" - }, - "small_bfd": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" - }, - "alphafold2_params": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold/alphafold_params_2022-03-02.tar" - }, - "mgnify": { - "type": "string", - "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" - }, - "pdb70": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200916.tar.gz" - }, - "pdb_mmcif": { - "type": "string", - "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/" - }, - "pdb_obsolete": { + "bfd_rosettafold_all_atom_path": { "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" + "default": "null/bfd/*" }, "uniref30": { "type": "string", "default": "http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz" }, - "uniref90": { - "type": "string", - "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" - }, - "pdb_seqres": { - "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" - }, - "uniprot_sprot": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" - }, - "uniprot_trembl": { - "type": "string", - "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" - }, - "bfd_name": { - "type": "string", - "default": "bfd" - }, - "smallbfd_name": { - "type": "string", - "default": "smallbfd" - }, - "mgnify_name": { - "type": "string", - "default": "mgnify" - }, - "pdb70_name": { - "type": "string", - "default": "pdb70" - }, - "pdb_mmcif_name": { - "type": "string", - "default": "pdb_mmcif" - }, - "uniref30_name": { - "type": "string", - "default": "uniref30" - }, - "uniref90_name": { - "type": "string", - "default": "uniref90" - }, - "pdb_seqres_name": { - "type": "string", - "default": "pdb_seqres" - }, - "uniprot_name": { - "type": "string", - "default": "uniprot" - }, - "alphafold_params_name": { - "type": "string", - "default": "params/alphafold_params_*" - }, - "mmcif_files_name": { - "type": "string", - "default": "pdb_mmcif/mmcif_files/" - }, - "mmcif_obsolete_name": { - "type": "string", - "default": "pdb_mmcif/obsolete.dat" - }, - "uniref30_db_name": { - "type": "string", - "default": "uniref30_2018_08" - }, - "bfd_first_non_consensus_sequences_name": { - "type": "string", - "default": "bfd-first_non_consensus_sequences.fasta" - }, - "uniprot_fasta_name": { - "type": "string", - "default": "uniprot.fasta" - }, - "pdb_seqres_txt_name": { - "type": "string", - "default": "pdb_seqres.txt" - }, - "bfd_metaclust_clu_complete_id30_c90_final_seq_sorted_opt_name": { - "type": "string", - "default": "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" - }, - "uniref90_fasta_name": { - "type": "string", - "default": "uniref90.fasta" - }, - "mgy_clusters_fasta_name": { - "type": "string", - "default": "mgy_clusters_2022_05.fa" - }, - "bfd_dir_path": { - "type": "string", - "default": "null/bfd/" - }, - "small_bfd_dir_path": { - "type": "string", - "default": "null/smallbfd/" - }, - "mgnify_dir_path": { - "type": "string", - "default": "null/mgnify/" - }, - "pdb70_dir_path": { - "type": "string", - "default": "null/pdb70/" - }, - "pdb_mmcif_dir_path": { - "type": "string", - "default": "null/pdb_mmcif/" - }, - "uniref30_dir_path": { - "type": "string", - "default": "null/uniref30/" - }, - "uniref90_dir_path": { - "type": "string", - "default": "null/uniref90/" - }, - "pdb_seqres_dir_path": { - "type": "string", - "default": "null/pdb_seqres/" - }, - "uniprot_dir_path": { - "type": "string", - "default": "null/uniprot/" - }, - "mgnify_database_path": { - "type": "string", - "default": "null/mgnify/" - }, - "template_mmcif_dir": { - "type": "string", - "default": "null/pdb_mmcif/mmcif_files//" - }, - "obsolete_pdbs_path": { - "type": "string", - "default": "null/pdb_mmcif/obsolete.dat" - }, "pdb100": { "type": "string", "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" From cbb7841f62c45470e88d1be670ae3ec63d10df14 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 4 Nov 2024 16:56:38 +1100 Subject: [PATCH 080/135] Katana HPC gpu compute option --- conf/katana.config | 11 +---------- modules/local/run_rosettafold_all_atom.nf | 1 + nextflow.config | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index 924bec03..720d53e3 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -60,15 +60,6 @@ process { } withLabel:gpu_compute { accelerator = 1 - clusterOptions = { "-lhost=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } + clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } } - singularity { - enabled = true - autoMounts = true - runOptions = "-B /mnt,/srv,${TMPDIR}:/tmp" - } - apptainer { - enabled = true - runOptions = "-B /mnt,/srv,${TMPDIR}:/tmp" - } } diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index d7625d07..2d66c885 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -3,6 +3,7 @@ */ process RUN_ROSETTAFOLD_ALL_ATOM { tag "$meta.id" + label 'process_medium' label 'gpu_compute' // Exit if running this module with -profile conda / -profile mamba diff --git a/nextflow.config b/nextflow.config index 63fd619a..5641a1c0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -239,6 +239,7 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } + katana { includeConfig 'conf/katana.config' } test { includeConfig 'conf/test.config' } test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } @@ -255,7 +256,6 @@ profiles { test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } - katana { includeConfig 'conf/katana.config' } } // Load nf-core custom profiles from different Institutions From 43f73647d9b0d7a7d92e5b83ee1b04bbeb818081 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 5 Nov 2024 10:41:24 +1100 Subject: [PATCH 081/135] Fixing crashes caused by HPC not being able to reach the online custom configs --- nextflow.config | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 5641a1c0..97900704 100644 --- a/nextflow.config +++ b/nextflow.config @@ -259,10 +259,18 @@ profiles { } // Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") +} // Load nf-core/proteinfold custom profiles from different institutions. -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" +try { + includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") +} // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled From e96e175de316f1f2e2274a4291a8462b49b2b130 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 5 Nov 2024 10:49:02 +1100 Subject: [PATCH 082/135] Ran nf-core linter --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/linting_comment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8dc3e6a4..992c391e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -17,7 +17,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/prot - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/proteinfold/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/proteinfold _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. -- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 40acc23f..42e519bf 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 with: workflow: linting.yml workflow_conclusion: completed From 0c173e58e49029eb2e59ce213f30115ad3229c2e Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 5 Nov 2024 10:57:50 +1100 Subject: [PATCH 083/135] deleted: .github/workflows/linting_comment.yml --- .github/workflows/linting_comment.yml | 28 --------------------------- 1 file changed, 28 deletions(-) delete mode 100644 .github/workflows/linting_comment.yml diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml deleted file mode 100644 index 42e519bf..00000000 --- a/.github/workflows/linting_comment.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: nf-core linting comment -# This workflow is triggered after the linting action is complete -# It posts an automated comment to the PR, even if the PR is coming from a fork - -on: - workflow_run: - workflows: ["nf-core linting"] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - name: Download lint results - uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 - with: - workflow: linting.yml - workflow_conclusion: completed - - - name: Get PR number - id: pr_number - run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - - - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - number: ${{ steps.pr_number.outputs.pr_number }} - path: linting-logs/lint_results.md From b58be9f0ef4feb9dbd3f0ccbb13b7dc09e17ea23 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 5 Nov 2024 10:59:34 +1100 Subject: [PATCH 084/135] Linting files --- .github/workflows/linting_comment.yml | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/linting_comment.yml diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 00000000..0bbcd30f --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@v2 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md From 11a2d9fd54ab209c1cb4872ca89bd31ff05e7e05 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 5 Nov 2024 12:06:40 +1100 Subject: [PATCH 085/135] Genericised pdb emission --- modules/local/run_rosettafold_all_atom.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 2d66c885..0470546f 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -21,7 +21,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { output: path ("${fasta.baseName}*") - tuple val(meta), path ("${fasta.baseName}*pdb"), emit: pdb + tuple val(meta), path ("*pdb"), emit: pdb tuple val(meta), path ("*_mqc.tsv"), emit: multiqc path "versions.yml", emit: versions From 5820f2969ae7a759c314ec12ab1ddb55020e2f6f Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 5 Nov 2024 14:58:07 +1100 Subject: [PATCH 086/135] Properly calls the HF3 container but cannot run through process yet --- assets/schema_input.json | 4 +- conf/dbs.config | 10 ++ main.nf | 46 +++++++- modules/local/run_helixfold3.nf | 73 +++++-------- nextflow.config | 2 +- nextflow_schema.json | 24 +++++ subworkflows/local/prepare_helixfold3_dbs.nf | 22 ++++ workflows/helixfold3.nf | 105 +++++++++++++++++++ 8 files changed, 234 insertions(+), 52 deletions(-) create mode 100644 subworkflows/local/prepare_helixfold3_dbs.nf create mode 100644 workflows/helixfold3.nf diff --git a/assets/schema_input.json b/assets/schema_input.json index 49e61a92..c61e2108 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -17,8 +17,8 @@ "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.(fa(sta)?|yaml|yml)$", - "errorMessage": "Fasta or yaml file must be provided, cannot contain spaces and must have extension '.fa', '.fasta', '.yaml' or '.yml'" + "pattern": "^\\S+\\.(fa(sta)?|yaml|yml|json)$", + "errorMessage": "Fasta, yaml or json file must be provided, cannot contain spaces and must have extension '.fa', '.fasta', '.yaml', '.yml', or '.json'" } }, "required": ["sequence", "fasta"] diff --git a/conf/dbs.config b/conf/dbs.config index 9ab25ea8..fbfc7447 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -57,6 +57,16 @@ params { pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/*" bfd_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/bfd/*" + // Helixfold3 links + uniclust30_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' + ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' + rfam_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta' + + // Helixfold3 paths + uniclust30_path = "{params.helixfold3_db}/uniclust30/*" + ccd_preprocessed_path = "{params.helixfold3_db}/*" + rfam_path = "{params.helixfold3_db}/*" + // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' esm2_t36_3B_UR50D = 'https://dl.fbaipublicfiles.com/fair-esm/models/esm2_t36_3B_UR50D.pt' diff --git a/main.nf b/main.nf index b09d5c45..1a8c188b 100644 --- a/main.nf +++ b/main.nf @@ -28,8 +28,12 @@ if (params.mode.toLowerCase().split(",").contains("esmfold")) { include { ESMFOLD } from './workflows/esmfold' } if (params.mode == "rosettafold_all_atom") { - include { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS } from './subworkflows/local/prepare_rosettafold_all_atom_dbs' - include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold_all_atom' + include { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS } from './subworkflows/local/prepare_rosettafold_all_atom_dbs' + include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold_all_atom' +} +if (params.mode == "helixfold3") { + include { PREPARE_HELIXFOLD3_DBS } from './subworkflows/local/prepare_helixfold3_dbs' + include { HELIXFOLD3 } from './workflows/helixfold3' } include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' @@ -211,7 +215,7 @@ workflow NFCORE_PROTEINFOLD { // if(params.mode == "rosettafold_all_atom") { // - // SUBWORKFLOW: Prepare Rosttafold-all-atom DBs + // SUBWORKFLOW: Prepare Rosettafold-all-atom DBs // PREPARE_ROSETTAFOLD_ALL_ATOM_DBS ( params.bfd_rosettafold_all_atom_path, @@ -230,10 +234,44 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30, PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100 ) -// ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report + ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) } + + // + // WORKFLOW: Run helixfold3 + // + if(params.mode == "helixfold3") { + // + // SUBWORKFLOW: Prepare helixfold3 DBs + // + PREPARE_HELIXFOLD3_DBS ( + params.uniclust30_path, + params.ccd_preprocessed_path, + params.rfam_path, + params.uniclust30_path, + params.ccd_preprocessed_path, + params.rfam_path + ) + ch_versions = ch_versions.mix(PREPARE_HELIXFOLD3_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/helixfold3 workflow + // + HELIXFOLD3 ( + ch_samplesheet, + ch_versions, + PREPARE_HELIXFOLD3_DBS.out.uniclust30, + PREPARE_HELIXFOLD3_DBS.out.ccd_preprocessed, + PREPARE_HELIXFOLD3_DBS.out.rfam + ) + ch_multiqc = HELIXFOLD3.out.multiqc_report + ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) + } + + + // // // POST PROCESSING: generate visulaisation reports // diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 58d72cd2..31855ffe 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -4,44 +4,36 @@ process RUN_HELIXFOLD3 { tag "$meta.id" label 'process_medium' + label 'gpu_compute' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error("Local RUN_HELIXFOLD3 module does not support Conda. Please use Docker / Singularity / Podman / Apptainer instead.") } - container "helixfold3.sif" + container "/srv/scratch/sbf/apptainers/hf3_step/hf3_step.sif" input: tuple val(meta), path(fasta) - val db_preset - path ('params/*') - path ('bfd/*') - path ('small_bfd/*') - path ('mgnify/*') - path ('rfam/*') - path ('pdb_mmcif/*') - path ('uniclust30/*') - path ('uniref90/*') - path ('pdb_seqres/*') - path ('uniprot/*') - path ('ccd/*') output: path ("${fasta.baseName}*") - path "*_mqc.tsv", emit: multiqc - path "versions.yaml", emit: versions + tuple val(meta), path ("*pdb"), emit: pdb + tuple val(meta), path ("*_mqc.tsv"), emit: multiqc + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + + def MAXIT_SRC="${params.helixfold3_db}/maxit-v11.200-prod-src" + def PATH="$MAXIT_SRC/bin:opt/miniforge/envs/helixfold/bin:$PATH" + def RCSBROOT="${MAXIT_SRC}" + def OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" - export PATH="/opt/miniforge/envs/helixfold/bin:$PATH" - export PATH="$MAXIT_SRC/bin:$PATH" - export OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" - export RCSBROOT=$MAXIT_SRC + """ + ln -s /srv/scratch/sbf/apptainers/PaddleHelix/apps/protein_folding/helixfold3/* . CUDA_VISIBLE_DEVICES=0 /opt/miniforge/envs/helixfold/bin/python3.9 inference.py \ --maxit_binary "${MAXIT_SRC}/bin/maxit" \ @@ -51,35 +43,26 @@ process RUN_HELIXFOLD3 { --kalign_binary_path "/opt/miniforge/envs/helixfold/bin/kalign" \ --hmmsearch_binary_path "/opt/miniforge/envs/helixfold/bin/hmmsearch" \ --hmmbuild_binary_path "/opt/miniforge/envs/helixfold/bin/hmmbuild" \ - --preset='${db_preset}' \ - --bfd_database_path="${params.alphafold2_db}bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \ - --small_bfd_database_path="${params.alphafold2_db}/g/bfd-first_non_consensus_sequences.fasta" \ - --uniclust30_database_path="${params.alphafold2_db}/g/uniclust30/uniclust30_2018_08" \ - --uniprot_database_path="${params.alphafold2_db}uniprot/uniprot.fasta" \ - --pdb_seqres_database_path="${params.alphafold2_db}pdb_seqres/pdb_seqres.txt" \ - --rfam_database_path="${params.alphafold2_db}/g/Rfam-14.9_rep_seq.fasta" \ - --template_mmcif_dir="${params.alphafold2_db}pdb_mmcif/mmcif_files" \ - --obsolete_pdbs_path="${params.alphafold2_db}pdb_mmcif/obsolete.dat" \ - --ccd_preprocessed_path="${params.alphafold2_db}/g/ccd_preprocessed_etkdg.pkl.gz" \ + --preset='reduced_dbs' \ + --bfd_database_path="${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \ + --small_bfd_database_path="${params.helixfold3_db}/bfd-first_non_consensus_sequences.fasta" \ + --uniclust30_database_path="${params.helixfold3_db}/uniclust30/uniclust30_2018_08" \ + --uniprot_database_path="${params.alphafold2_db}/uniprot/uniprot.fasta" \ + --pdb_seqres_database_path="${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt" \ + --rfam_database_path="${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" \ + --template_mmcif_dir="${params.alphafold2_db}/pdb_mmcif/mmcif_files" \ + --obsolete_pdbs_path="${params.alphafold2_db}/pdb_mmcif/obsolete.dat" \ + --ccd_preprocessed_path="${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" \ + --uniref90_database_path "${params.helixfold3_db}/uniref90/uniref90.fasta" \ + --mgnify_database_path "${params.helixfold3_db}/mgnify/mgy_clusters_2018_12.fa" \ --max_template_date=2024-08-14 \ - --input_mnt="$fasta" \ + --input_json="${fasta}" \ --output_dir="\$PWD" \ --model_name allatom_demo \ --init_model init_models/HelixFold3-240814.pdparams \ --infer_times 5 \ --precision "bf16" - cp "${fasta.baseName}"/"${fasta.baseName}"-rank1/predicted_structure.pdb ./"${fasta.baseName}".helixfold.pdb - cd "${fasta.baseName}" - awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank1/predicted_structure.pdb | uniq > ranked_0_plddt.tsv - for i in 1 2 3 4 - do awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank\$i/predicted_structure.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv - done - paste ranked_0_plddt.tsv ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv - echo -e Positions"\\t"rank_0"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv - cat header.tsv plddt.tsv > ../"${fasta.baseName}"_plddt_mqc.tsv - cd .. - cp ${fasta.baseName}* ./ cat <<-END_VERSIONS > versions.yaml "${task.process}": @@ -89,12 +72,12 @@ process RUN_HELIXFOLD3 { stub: """ - touch ./"${fasta.baseName}".alphafold.pdb + touch ./"${fasta.baseName}".helixfold3.pdb touch ./"${fasta.baseName}"_mqc.tsv - cat <<-END_VERSIONS > versions.yaml + cat <<-END_VERSIONS > versions.yml "${task.process}": - awk: \$(gawk --version| head -1 | sed 's/GNU Awk //; s/, API:.*//') + python: \$(python3 --version | sed 's/Python //g') END_VERSIONS """ } diff --git a/nextflow.config b/nextflow.config index 97900704..5912dab2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,7 @@ params { // Input options input = null - mode = 'alphafold2' // {alphafold2, colabfold, esmfold, rosettafold_all_atom} + mode = 'alphafold2' // {alphafold2, colabfold, esmfold, rosettafold_all_atom, helixfold3} use_gpu = false // Alphafold2 parameters diff --git a/nextflow_schema.json b/nextflow_schema.json index 5000582a..519390d3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -688,6 +688,30 @@ "pdb100": { "type": "string", "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" + }, + "uniclust30_link": { + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz" + }, + "ccd_preprocessed_link": { + "type": "string", + "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz" + }, + "rfam_link": { + "type": "string", + "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta" + }, + "uniclust30_path": { + "type": "string", + "default": "{params.helixfold3_db}/uniclust30/*" + }, + "ccd_preprocessed_path": { + "type": "string", + "default": "{params.helixfold3_db}/*" + }, + "rfam_path": { + "type": "string", + "default": "{params.helixfold3_db}/*" } } } diff --git a/subworkflows/local/prepare_helixfold3_dbs.nf b/subworkflows/local/prepare_helixfold3_dbs.nf new file mode 100644 index 00000000..b96771cc --- /dev/null +++ b/subworkflows/local/prepare_helixfold3_dbs.nf @@ -0,0 +1,22 @@ +workflow PREPARE_HELIXFOLD3_DBS { + + take: + uniclust30_path + ccd_preprocessed_path + rfam_path + uniclust30_link + ccd_preprocessed_link + rfam_link + + main: + ch_uniclust30 = Channel.value(file(uniclust30_path)) + ch_ccd_preprocessed = Channel.value(file(ccd_preprocessed_path)) + ch_rfam = Channel.value(file(rfam_path)) + ch_versions = Channel.empty() + + emit: + uniclust30 = ch_uniclust30 + ccd_preprocessed = ch_ccd_preprocessed + rfam = ch_rfam + versions = ch_versions +} diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf new file mode 100644 index 00000000..a05cdc9f --- /dev/null +++ b/workflows/helixfold3.nf @@ -0,0 +1,105 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { RUN_HELIXFOLD3 } from '../modules/local/run_helixfold3' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { MULTIQC } from '../modules/nf-core/multiqc/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow HELIXFOLD3 { + + take: + ch_samplesheet + ch_versions // channel: [ path(versions.yml) ] + ch_uniclust30 + ch_ccd_preprocessed + ch_rfam + + main: + ch_multiqc_files = Channel.empty() + + // + // SUBWORKFLOW: Run helixfold3 + // + RUN_HELIXFOLD3 ( + ch_samplesheet + ) + ch_multiqc_rep = RUN_HELIXFOLD3.out.multiqc.collect() + ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + // + // MODULE: MultiQC + // + ch_multiqc_report = Channel.empty() + if (!params.skip_multiqc) { + ch_multiqc_report = Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [] + ) + ch_multiqc_report = MULTIQC.out.report.toList() + } + + emit: + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ From 1033e365759364752cad7011e527e2c5e11a8358 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 6 Nov 2024 13:00:56 +1100 Subject: [PATCH 087/135] Updated apptainer image paths --- modules/local/run_helixfold3.nf | 2 +- modules/local/run_rosettafold_all_atom.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 31855ffe..5dd6863d 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -11,7 +11,7 @@ process RUN_HELIXFOLD3 { error("Local RUN_HELIXFOLD3 module does not support Conda. Please use Docker / Singularity / Podman / Apptainer instead.") } - container "/srv/scratch/sbf/apptainers/hf3_step/hf3_step.sif" + container "/srv/scratch/sbf/apptainers/helixfold3.sif" input: tuple val(meta), path(fasta) diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 0470546f..c45d2351 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -11,7 +11,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "/srv/scratch/z5378336/RoseTTAFold_All_Atom.sif" + container "/srv/scratch/sbf/apptainers/RoseTTAFold_All_Atom.sif" input: tuple val(meta), path(fasta) From b444b6597a2d255b588b42c0fa45b505614228e3 Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 6 Nov 2024 16:59:44 +1100 Subject: [PATCH 088/135] HF3 is now able to start a run --- conf/katana.config | 9 ++++++--- modules/local/run_helixfold3.nf | 10 +++++----- nextflow.config | 1 + 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index 720d53e3..44e56e99 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -36,8 +36,8 @@ process { time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { 8 * task.attempt } - memory = { 125.GB * task.attempt } + cpus = { 6 * task.attempt } + memory = { 46.GB * task.attempt } time = { 8.h * task.attempt } } withLabel:process_high { @@ -61,5 +61,8 @@ process { withLabel:gpu_compute { accelerator = 1 clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } - } + containerOptions = "--nv --bind /mnt,/data,/srv,${TMPDIR}:/tmp,${PWD} --env CUDA_VISIBLE_DEVICES=0" + } + apptainer.runOptions = "--nv --bind /mnt,/data,/srv,${TMPDIR}:/tmp,${PWD} --env CUDA_VISIBLE_DEVICES=0" + singularity.runOptions = "--nv --bind /mnt,/data,/srv,${TMPDIR}:/tmp,${PWD} --env CUDA_VISIBLE_DEVICES=0" } diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 5dd6863d..098fb564 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -26,16 +26,16 @@ process RUN_HELIXFOLD3 { task.ext.when == null || task.ext.when script: - def MAXIT_SRC="${params.helixfold3_db}/maxit-v11.200-prod-src" def PATH="$MAXIT_SRC/bin:opt/miniforge/envs/helixfold/bin:$PATH" def RCSBROOT="${MAXIT_SRC}" def OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" - + def CUDA_VISIBLE_DEVICES=0 + """ ln -s /srv/scratch/sbf/apptainers/PaddleHelix/apps/protein_folding/helixfold3/* . - CUDA_VISIBLE_DEVICES=0 /opt/miniforge/envs/helixfold/bin/python3.9 inference.py \ + /opt/miniforge/envs/helixfold/bin/python3.9 inference.py \ --maxit_binary "${MAXIT_SRC}/bin/maxit" \ --jackhmmer_binary_path "/opt/miniforge/envs/helixfold/bin/jackhmmer" \ --hhblits_binary_path "/opt/miniforge/envs/helixfold/bin/hhblits" \ @@ -60,11 +60,11 @@ process RUN_HELIXFOLD3 { --output_dir="\$PWD" \ --model_name allatom_demo \ --init_model init_models/HelixFold3-240814.pdparams \ - --infer_times 5 \ + --infer_times 1 \ --precision "bf16" - cat <<-END_VERSIONS > versions.yaml + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python3 --version | sed 's/Python //g') END_VERSIONS diff --git a/nextflow.config b/nextflow.config index 5912dab2..07d62cf3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -220,6 +220,7 @@ profiles { apptainer { apptainer.enabled = true apptainer.autoMounts = true + if (params.use_gpu) { apptainer.runOptions = '--nv' } conda.enabled = false docker.enabled = false singularity.enabled = false From 456b9e5c0196780239a51a50e3b2c6ed24f2492b Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 6 Nov 2024 21:32:48 +1100 Subject: [PATCH 089/135] First working version of HF3 See https://github.com/Australian-Structural-Biology-Computing/proteinfold/issues/18 Related to https://github.com/nf-core/proteinfold/issues/211 --- conf/dbs.config | 14 ++++++++------ modules/local/run_helixfold3.nf | 17 ++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index fbfc7447..fbbcfd13 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -58,14 +58,16 @@ params { bfd_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/bfd/*" // Helixfold3 links - uniclust30_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' - ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' - rfam_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta' + uniclust30_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' + ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' + rfam_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta' + helixfold3_init_models_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip' // Helixfold3 paths - uniclust30_path = "{params.helixfold3_db}/uniclust30/*" - ccd_preprocessed_path = "{params.helixfold3_db}/*" - rfam_path = "{params.helixfold3_db}/*" + uniclust30_path = "${params.helixfold3_db}/uniclust30/*" + ccd_preprocessed_path = "${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" + rfam_path = "${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" + helixfold3_init_models_path = "${params.helixfold3_db}" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 098fb564..58bf7cf4 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -26,17 +26,16 @@ process RUN_HELIXFOLD3 { task.ext.when == null || task.ext.when script: - def MAXIT_SRC="${params.helixfold3_db}/maxit-v11.200-prod-src" - def PATH="$MAXIT_SRC/bin:opt/miniforge/envs/helixfold/bin:$PATH" - def RCSBROOT="${MAXIT_SRC}" - def OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" - def CUDA_VISIBLE_DEVICES=0 - """ - ln -s /srv/scratch/sbf/apptainers/PaddleHelix/apps/protein_folding/helixfold3/* . + export MAXIT_SRC="${params.helixfold3_db}/maxit-v11.200-prod-src" + export RCSBROOT="\$MAXIT_SRC" + export PATH="\$MAXIT_SRC/bin:opt/miniforge/envs/helixfold/bin:$PATH" + export OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" + + ln -s /app/helixfold3/* . /opt/miniforge/envs/helixfold/bin/python3.9 inference.py \ - --maxit_binary "${MAXIT_SRC}/bin/maxit" \ + --maxit_binary "\$MAXIT_SRC/bin/maxit" \ --jackhmmer_binary_path "/opt/miniforge/envs/helixfold/bin/jackhmmer" \ --hhblits_binary_path "/opt/miniforge/envs/helixfold/bin/hhblits" \ --hhsearch_binary_path "/opt/miniforge/envs/helixfold/bin/hhsearch" \ @@ -59,7 +58,7 @@ process RUN_HELIXFOLD3 { --input_json="${fasta}" \ --output_dir="\$PWD" \ --model_name allatom_demo \ - --init_model init_models/HelixFold3-240814.pdparams \ + --init_model "${params.helixfold3_init_models_path}/HelixFold3-240814.pdparams" \ --infer_times 1 \ --precision "bf16" From 87c06062e24fecfad82b6148a59aaad705ab492d Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 11:22:04 +1100 Subject: [PATCH 090/135] Schema updates --- modules/local/run_helixfold3.nf | 4 +++- nextflow_schema.json | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 58bf7cf4..24d562bd 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -18,6 +18,7 @@ process RUN_HELIXFOLD3 { output: path ("${fasta.baseName}*") + tuple val(meta), path ("${meta.id}_helixfold3.pdb"), emit: main_pdb tuple val(meta), path ("*pdb"), emit: pdb tuple val(meta), path ("*_mqc.tsv"), emit: multiqc path "versions.yml", emit: versions @@ -29,7 +30,7 @@ process RUN_HELIXFOLD3 { """ export MAXIT_SRC="${params.helixfold3_db}/maxit-v11.200-prod-src" export RCSBROOT="\$MAXIT_SRC" - export PATH="\$MAXIT_SRC/bin:opt/miniforge/envs/helixfold/bin:$PATH" + export PATH="\$MAXIT_SRC/bin:/opt/miniforge/envs/helixfold/bin:$PATH" export OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" ln -s /app/helixfold3/* . @@ -60,6 +61,7 @@ process RUN_HELIXFOLD3 { --model_name allatom_demo \ --init_model "${params.helixfold3_init_models_path}/HelixFold3-240814.pdparams" \ --infer_times 1 \ + --diff_batch_size 1 \ --precision "bf16" diff --git a/nextflow_schema.json b/nextflow_schema.json index 519390d3..807663f2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -712,6 +712,14 @@ "rfam_path": { "type": "string", "default": "{params.helixfold3_db}/*" + }, + "helixfold3_init_models_link": { + "type": "string", + "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip" + }, + "helixfold3_init_models_path": { + "type": "string", + "default": "$helixfold3_db" } } } From 1c95a557dcd9dce0f961ce24fa715c0bd0b40e75 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 11:32:29 +1100 Subject: [PATCH 091/135] Fixing config lines --- nextflow.config | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/nextflow.config b/nextflow.config index 07d62cf3..288b0607 100644 --- a/nextflow.config +++ b/nextflow.config @@ -220,7 +220,6 @@ profiles { apptainer { apptainer.enabled = true apptainer.autoMounts = true - if (params.use_gpu) { apptainer.runOptions = '--nv' } conda.enabled = false docker.enabled = false singularity.enabled = false @@ -260,18 +259,10 @@ profiles { } // Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/proteinfold custom profiles from different institutions. -try { - includeConfig "${params.custom_config_base}/pipeline/proteinfold.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/proteinfold profiles: ${params.custom_config_base}/pipeline/proteinfold.config") -} +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -387,6 +378,9 @@ if (params.mode.toLowerCase().split(",").contains("esmfold")) { if (params.mode.toLowerCase().split(",").contains("rosettafold_all_atom")) { includeConfig 'conf/modules_rosettafold_all_atom.config' } +if (params.mode.toLowerCase().split(",").contains("helixfold3")) { + includeConfig 'conf/modules_helixfold3.config' +} // Load links to DBs and parameters includeConfig 'conf/dbs.config' From 9a275705f417586012fedbee228077b6ca64297f Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 12:09:00 +1100 Subject: [PATCH 092/135] new file: conf/modules_helixfold3.config --- conf/modules_helixfold3.config | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 conf/modules_helixfold3.config diff --git a/conf/modules_helixfold3.config b/conf/modules_helixfold3.config new file mode 100644 index 00000000..1f1c3f81 --- /dev/null +++ b/conf/modules_helixfold3.config @@ -0,0 +1,22 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + withName: 'NFCORE_PROTEINFOLD:HELIXFOLD3:MULTIQC' { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : "helixfold3_$filename" } + ] + } + +} From c5cfeff96eda18718105c1bf82a21120f00b8ded Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 12:55:56 +1100 Subject: [PATCH 093/135] Modified katana.config to allow for direct execution of jobs on k095 from nextflow run --- conf/katana.config | 9 +++++---- nextflow.config | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index 44e56e99..e2b7051b 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -8,6 +8,7 @@ params { } process { + executor = 'pbspro' queue = 'submission' resourceLimits = [ @@ -37,7 +38,7 @@ process { } withLabel:process_medium { cpus = { 6 * task.attempt } - memory = { 46.GB * task.attempt } + memory = { 100.GB * task.attempt } time = { 8.h * task.attempt } } withLabel:process_high { @@ -61,8 +62,8 @@ process { withLabel:gpu_compute { accelerator = 1 clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } - containerOptions = "--nv --bind /mnt,/data,/srv,${TMPDIR}:/tmp,${PWD} --env CUDA_VISIBLE_DEVICES=0" + containerOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" } - apptainer.runOptions = "--nv --bind /mnt,/data,/srv,${TMPDIR}:/tmp,${PWD} --env CUDA_VISIBLE_DEVICES=0" - singularity.runOptions = "--nv --bind /mnt,/data,/srv,${TMPDIR}:/tmp,${PWD} --env CUDA_VISIBLE_DEVICES=0" + apptainer.runOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" + singularity.runOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" } diff --git a/nextflow.config b/nextflow.config index 288b0607..515470cd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -85,6 +85,21 @@ params { pdb100_path = null bfd_rosettafold_all_atom_path = null + // Helixfold3 parameters + helixfold3_db = null + + // Helixfold3 links + uniclust30_link = null + ccd_preprocessed_link = null + rfam_link = null + helixfold3_init_models_link = null + + // Helixfold3 paths + uniclust30_path = null + ccd_preprocessed_path = null + rfam_path = null + helixfold3_init_models_path = null + // Foldseek params foldseek_search = null foldseek_easysearch_arg = null @@ -259,10 +274,10 @@ profiles { } // Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" +//includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/proteinfold custom profiles from different institutions. -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" +//includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled From 738fb0ad8501db124da628f14ee9f7b23b35e29c Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 14:07:52 +1100 Subject: [PATCH 094/135] Fixing left over merge lines and linting --- conf/katana.config | 6 +++--- modules/local/run_alphafold2.nf | 8 -------- modules/local/run_alphafold2_msa.nf | 8 -------- nextflow_schema.json | 3 +++ 4 files changed, 6 insertions(+), 19 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index e2b7051b..ab5f023a 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -8,7 +8,7 @@ params { } process { - executor = 'pbspro' + //executor = 'pbspro' queue = 'submission' resourceLimits = [ @@ -42,8 +42,8 @@ process { time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { 16 * task.attempt } - memory = { 250.GB * task.attempt } + cpus = { 8 * task.attempt } + memory = { 125.GB * task.attempt } time = { 16.h * task.attempt } } withLabel:process_long { diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2.nf index f863387d..63370ee7 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2.nf @@ -10,15 +10,7 @@ process RUN_ALPHAFOLD2 { error("Local RUN_ALPHAFOLD2 module does not support Conda. Please use Docker / Singularity / Podman instead.") } -<<<<<<< HEAD -<<<<<<< HEAD - container "nf-core/proteinfold_alphafold2_standard:1.1.1" -======= container "nf-core/proteinfold_alphafold2_standard:dev" ->>>>>>> upstream/dev -======= - container "nf-core/proteinfold_alphafold2_standard:dev" ->>>>>>> 456b9e5c0196780239a51a50e3b2c6ed24f2492b input: tuple val(meta), path(fasta) diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa.nf index 94a339ad..5c4c747c 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa.nf @@ -10,15 +10,7 @@ process RUN_ALPHAFOLD2_MSA { error("Local RUN_ALPHAFOLD2_MSA module does not support Conda. Please use Docker / Singularity / Podman instead.") } -<<<<<<< HEAD -<<<<<<< HEAD - container "nf-core/proteinfold_alphafold2_msa:1.1.1" -======= container "nf-core/proteinfold_alphafold2_msa:dev" ->>>>>>> upstream/dev -======= - container "nf-core/proteinfold_alphafold2_msa:dev" ->>>>>>> 456b9e5c0196780239a51a50e3b2c6ed24f2492b input: tuple val(meta), path(fasta) diff --git a/nextflow_schema.json b/nextflow_schema.json index 807663f2..401c09c9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -720,6 +720,9 @@ "helixfold3_init_models_path": { "type": "string", "default": "$helixfold3_db" + }, + "helixfold3_db": { + "type": "string" } } } From 8943df19548a170a01d27d33df2fec3d964477f7 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 14:10:38 +1100 Subject: [PATCH 095/135] Linting modified: .github/workflows/linting_comment.yml --- .github/workflows/linting_comment.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..42e519bf 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} From 77c2ad4f4613d3295038a6716c3fda64f0b4372e Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 14:13:49 +1100 Subject: [PATCH 096/135] Updated awsfulltest.yml --- .github/workflows/awsfulltest.yml | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 3774758d..ea0a032b 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -1,16 +1,21 @@ name: nf-core AWS full size tests -# This workflow is triggered on published releases. +# This workflow is triggered on PRs opened against the master branch. # It can be additionally triggered manually with GitHub actions workflow dispatch button. # It runs the -profile 'test_full' on AWS batch on: - release: - types: [published] + pull_request: + branches: + - master workflow_dispatch: + pull_request_review: + types: [submitted] + jobs: run-platform: name: Run AWS full tests - if: github.repository == 'nf-core/proteinfold' + # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered + if: github.repository == 'nf-core/proteinfold' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest # Do a full-scale run on each of the mode strategy: @@ -27,6 +32,18 @@ jobs: "esmfold_multimer", ] steps: + - uses: octokit/request-action@v2.x + id: check_approvals + with: + route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - id: test_variables + if: github.event_name != 'workflow_dispatch' + run: | + JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' + CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') + test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: From 8ebc5312e44cf45c29c0ea7b7fc64897e41eaaf2 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 14:20:35 +1100 Subject: [PATCH 097/135] Passes linting --- nextflow_schema.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 401c09c9..3f638e62 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -703,23 +703,22 @@ }, "uniclust30_path": { "type": "string", - "default": "{params.helixfold3_db}/uniclust30/*" + "default": "null/uniclust30/*" }, "ccd_preprocessed_path": { "type": "string", - "default": "{params.helixfold3_db}/*" + "default": "null/ccd_preprocessed_etkdg.pkl.gz" }, "rfam_path": { "type": "string", - "default": "{params.helixfold3_db}/*" + "default": "null/Rfam-14.9_rep_seq.fasta" }, "helixfold3_init_models_link": { "type": "string", "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip" }, "helixfold3_init_models_path": { - "type": "string", - "default": "$helixfold3_db" + "type": "string" }, "helixfold3_db": { "type": "string" From c4e8c9c6d795b8abfe8e92c37c7b53214573a3eb Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 14:21:46 +1100 Subject: [PATCH 098/135] modified: conf/katana.config --- conf/katana.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/katana.config b/conf/katana.config index ab5f023a..8a820efc 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -8,7 +8,7 @@ params { } process { - //executor = 'pbspro' + executor = 'pbspro' queue = 'submission' resourceLimits = [ From ef28950c0affc58da388f397f1ab258a08669ea5 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 14:57:30 +1100 Subject: [PATCH 099/135] Fixing file emit for hf3 --- modules/local/run_helixfold3.nf | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 24d562bd..c1f20e41 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -18,9 +18,11 @@ process RUN_HELIXFOLD3 { output: path ("${fasta.baseName}*") + tuple val(meta), path ("${meta.id}_helixfold3.cif"), emit: main_cif tuple val(meta), path ("${meta.id}_helixfold3.pdb"), emit: main_pdb - tuple val(meta), path ("*pdb"), emit: pdb - tuple val(meta), path ("*_mqc.tsv"), emit: multiqc + tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb + tuple val(meta), path ("${fasta.baseName}/*_msa.tsv") , emit: msa + tuple val(meta), path ("*_mqc.tsv") , emit: multiqc path "versions.yml", emit: versions when: @@ -64,6 +66,21 @@ process RUN_HELIXFOLD3 { --diff_batch_size 1 \ --precision "bf16" + cp "${fasta.baseName}"/"${fasta.baseName}"-rank1/predicted_structure.pdb ./"${meta.id}"_helixfold3.pdb + cp "${fasta.baseName}"/"${fasta.baseName}"-rank1/predicted_structure.cif ./"${meta.id}"_helixfold3.cif + cd "${fasta.baseName}" + awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank1/predicted_structure.pdb | uniq > ranked_1_plddt.tsv + for i in 2 3 4 + do awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank\$i/predicted_structure.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv + done + paste ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv + echo -e Positions"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv + cat header.tsv plddt.tsv > ../"${meta.id}"_plddt_mqc.tsv + cp final_features.pkl ../ + for i in 2 3 4 + do cp "${fasta.baseName}"-rank\$i/predicted_structure.pdb" ../ranked_\$i.pdb + done + cd .. cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -73,8 +90,14 @@ process RUN_HELIXFOLD3 { stub: """ - touch ./"${fasta.baseName}".helixfold3.pdb - touch ./"${fasta.baseName}"_mqc.tsv + touch ./"${meta.id}"_helixfold3.pdb + touch ./"${meta.id}"_mqc.tsv + mkdir "${fasta.baseName}" + touch "${fasta.baseName}/ranked_1.pdb" + touch "${fasta.baseName}/ranked_2.pdb" + touch "${fasta.baseName}/ranked_3.pdb" + touch "${fasta.baseName}/ranked_4.pdb" + touch "${fasta.baseName}/${fasta.baseName}_msa.tsv" cat <<-END_VERSIONS > versions.yml "${task.process}": From bbe81b98a5d3d27ee1c1b2c719f44523f8352067 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 12 Nov 2024 15:49:06 +1100 Subject: [PATCH 100/135] Emits files including cif properly now --- conf/katana.config | 8 +++----- modules/local/run_helixfold3.nf | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index 8a820efc..41c31321 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -60,10 +60,8 @@ process { maxRetries = 2 } withLabel:gpu_compute { - accelerator = 1 - clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } - containerOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" + accelerator = 1 + clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } + containerOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" } - apptainer.runOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" - singularity.runOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" } diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index c1f20e41..4445ab50 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -3,7 +3,6 @@ */ process RUN_HELIXFOLD3 { tag "$meta.id" - label 'process_medium' label 'gpu_compute' // Exit if running this module with -profile conda / -profile mamba @@ -90,6 +89,7 @@ process RUN_HELIXFOLD3 { stub: """ + touch ./"${meta.id}"_helixfold3.cif touch ./"${meta.id}"_helixfold3.pdb touch ./"${meta.id}"_mqc.tsv mkdir "${fasta.baseName}" From 9afa0588da294db1a4c52262825ef703f339a47d Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 13 Nov 2024 15:09:33 +1100 Subject: [PATCH 101/135] HF3 and RFAA now working with Katana OnDemand --- conf/katana.config | 2 +- modules/local/run_helixfold3.nf | 24 +++++++++++------------ modules/local/run_rosettafold_all_atom.nf | 23 +++++++++++----------- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index 41c31321..e0f6830c 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -8,7 +8,7 @@ params { } process { - executor = 'pbspro' +// executor = 'pbspro' queue = 'submission' resourceLimits = [ diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 4445ab50..4d49c9f8 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -17,11 +17,10 @@ process RUN_HELIXFOLD3 { output: path ("${fasta.baseName}*") - tuple val(meta), path ("${meta.id}_helixfold3.cif"), emit: main_cif - tuple val(meta), path ("${meta.id}_helixfold3.pdb"), emit: main_pdb - tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb - tuple val(meta), path ("${fasta.baseName}/*_msa.tsv") , emit: msa - tuple val(meta), path ("*_mqc.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif + tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: main_pdb + tuple val(meta), path ("ranked*pdb") , emit: pdb + tuple val(meta), path ("*_mqc.tsv") , emit: multiqc path "versions.yml", emit: versions when: @@ -61,8 +60,9 @@ process RUN_HELIXFOLD3 { --output_dir="\$PWD" \ --model_name allatom_demo \ --init_model "${params.helixfold3_init_models_path}/HelixFold3-240814.pdparams" \ - --infer_times 1 \ + --infer_times 4 \ --diff_batch_size 1 \ + --logging_level "ERROR" \ --precision "bf16" cp "${fasta.baseName}"/"${fasta.baseName}"-rank1/predicted_structure.pdb ./"${meta.id}"_helixfold3.pdb @@ -77,7 +77,7 @@ process RUN_HELIXFOLD3 { cat header.tsv plddt.tsv > ../"${meta.id}"_plddt_mqc.tsv cp final_features.pkl ../ for i in 2 3 4 - do cp "${fasta.baseName}"-rank\$i/predicted_structure.pdb" ../ranked_\$i.pdb + do cp ""${fasta.baseName}"-rank\$i/predicted_structure.pdb" ../ranked_\$i.pdb done cd .. @@ -92,12 +92,10 @@ process RUN_HELIXFOLD3 { touch ./"${meta.id}"_helixfold3.cif touch ./"${meta.id}"_helixfold3.pdb touch ./"${meta.id}"_mqc.tsv - mkdir "${fasta.baseName}" - touch "${fasta.baseName}/ranked_1.pdb" - touch "${fasta.baseName}/ranked_2.pdb" - touch "${fasta.baseName}/ranked_3.pdb" - touch "${fasta.baseName}/ranked_4.pdb" - touch "${fasta.baseName}/${fasta.baseName}_msa.tsv" + touch "ranked_1.pdb" + touch "ranked_2.pdb" + touch "ranked_3.pdb" + touch "ranked_4.pdb" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index c45d2351..5859e047 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -3,7 +3,6 @@ */ process RUN_ROSETTAFOLD_ALL_ATOM { tag "$meta.id" - label 'process_medium' label 'gpu_compute' // Exit if running this module with -profile conda / -profile mamba @@ -21,8 +20,9 @@ process RUN_ROSETTAFOLD_ALL_ATOM { output: path ("${fasta.baseName}*") - tuple val(meta), path ("*pdb"), emit: pdb - tuple val(meta), path ("*_mqc.tsv"), emit: multiqc + tuple val(meta), path ("${meta.id}_rosettafold_all_atom.pdb") , emit: main_pdb + tuple val(meta), path ("*pdb") , emit: pdb + tuple val(meta), path ("*_mqc.tsv") , emit: multiqc path "versions.yml", emit: versions when: @@ -33,14 +33,15 @@ process RUN_ROSETTAFOLD_ALL_ATOM { ln -s /app/RoseTTAFold-All-Atom/* . mamba run --name RFAA python -m rf2aa.run_inference \ - --config-dir $PWD \ - --config-path $PWD \ + loader_params.MAXCYCLE=1 \ + checkpoint_path="/srv/scratch/sbf/rfaa/RFAA_paper_weights.pt" \ + --config-dir /app/RoseTTAFold-All-Atom/rf2aa/config/inference \ --config-name "${fasta}" - cp "${fasta.baseName}".pdb ./"${fasta.baseName}".rosettafold_all_atom.pdb - awk '{print \$6"\\t"\$11}' "${fasta.baseName}".rosettafold_all_atom.pdb | uniq > plddt.tsv - echo -e Positions"\\t" > header.tsv - cat header.tsv plddt.tsv > "${fasta.baseName}"_plddt_mqc.tsv + cp "${fasta.baseName}".pdb ./"${meta.id}"_rosettafold_all_atom.pdb + awk '{print \$6"\\t"\$11}' "${meta.id}"_rosettafold_all_atom.pdb | uniq > plddt.tsv + echo -e Positions"\\t""${meta.id}"_rosettafold_all_atom.pdb > header.tsv + cat header.tsv plddt.tsv > "${meta.id}"_plddt_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -50,8 +51,8 @@ process RUN_ROSETTAFOLD_ALL_ATOM { stub: """ - touch ./"${fasta.baseName}".rosettafold_all_atom.pdb - touch ./"${fasta.baseName}"_mqc.tsv + touch ./"${meta.id}"_rosettafold_all_atom.pdb + touch ./"${meta.id}"_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": From b75cdfcced0690774456b759d4afc3584931ec8c Mon Sep 17 00:00:00 2001 From: jscgh Date: Wed, 13 Nov 2024 17:16:44 +1100 Subject: [PATCH 102/135] New branch for aligning the new modules (RFAA & HF3) with the nf-core repo --- conf/katana.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/katana.config b/conf/katana.config index e0f6830c..bf463c6a 100644 --- a/conf/katana.config +++ b/conf/katana.config @@ -2,7 +2,7 @@ params { - config_profile_description = 'UNSW Katana HPC profile provided by nf-core/configs' + config_profile_description = 'UNSW Katana HPC profile for nf-core/proteinfold' config_profile_contact = '@jscgh' config_profile_url = 'https://docs.restech.unsw.edu.au/' } @@ -42,7 +42,7 @@ process { time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { 8 * task.attempt } + cpus = { 8 * task.attempt } memory = { 125.GB * task.attempt } time = { 16.h * task.attempt } } From ac9daaac7c0b4a6b3928504261129b4feefc11c0 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 14 Nov 2024 16:22:43 +1100 Subject: [PATCH 103/135] Updated for running with configs --- .gitignore | 1 + conf/katana.config | 67 ----------------------- modules/local/run_helixfold3.nf | 1 + modules/local/run_rosettafold_all_atom.nf | 1 + nextflow.config | 1 + 5 files changed, 4 insertions(+), 67 deletions(-) delete mode 100644 conf/katana.config diff --git a/.gitignore b/.gitignore index a42ce016..92fd8dfb 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc null/ +conf/.* diff --git a/conf/katana.config b/conf/katana.config deleted file mode 100644 index bf463c6a..00000000 --- a/conf/katana.config +++ /dev/null @@ -1,67 +0,0 @@ -// UNSW Katana nf-core configuration profile - - -params { - config_profile_description = 'UNSW Katana HPC profile for nf-core/proteinfold' - config_profile_contact = '@jscgh' - config_profile_url = 'https://docs.restech.unsw.edu.au/' -} - -process { -// executor = 'pbspro' - queue = 'submission' - - resourceLimits = [ - memory: 500.GB, - cpus: 32, - time: 200.h - ] - - // TODO nf-core: Check the defaults for all processes - cpus = { 1 * task.attempt } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } - - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - withLabel:process_single { - cpus = { 1 } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } - } - withLabel:process_low { - cpus = { 2 * task.attempt } - memory = { 12.GB * task.attempt } - time = { 4.h * task.attempt } - } - withLabel:process_medium { - cpus = { 6 * task.attempt } - memory = { 100.GB * task.attempt } - time = { 8.h * task.attempt } - } - withLabel:process_high { - cpus = { 8 * task.attempt } - memory = { 125.GB * task.attempt } - time = { 16.h * task.attempt } - } - withLabel:process_long { - time = { 24.h * task.attempt } - } - withLabel:process_high_memory { - memory = { 250.GB * task.attempt } - } - withLabel:error_ignore { - errorStrategy = 'ignore' - } - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 2 - } - withLabel:gpu_compute { - accelerator = 1 - clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=8 -l mem=125gb" } - containerOptions = "--nv --bind /mnt,/data,/srv,\${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" - } -} diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 4d49c9f8..4ffbe3b2 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -4,6 +4,7 @@ process RUN_HELIXFOLD3 { tag "$meta.id" label 'gpu_compute' + label 'process_medium' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 5859e047..7f4fda0a 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -4,6 +4,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { tag "$meta.id" label 'gpu_compute' + label 'process_medium' // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/nextflow.config b/nextflow.config index 515470cd..6c53e205 100644 --- a/nextflow.config +++ b/nextflow.config @@ -235,6 +235,7 @@ profiles { apptainer { apptainer.enabled = true apptainer.autoMounts = true + if (params.use_gpu) { apptainer.runOptions = '--nv' } conda.enabled = false docker.enabled = false singularity.enabled = false From 06250537b703147119601279cb0803bd08201aaf Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 15 Nov 2024 16:45:44 +1100 Subject: [PATCH 104/135] Overhauled helixfold3 db paths to match nf-core methods --- .gitignore | 2 +- conf/dbs.config | 25 +++++--- main.nf | 42 ++++++++++--- modules/local/run_helixfold3.nf | 37 ++++++++---- modules/local/run_rosettafold_all_atom.nf | 2 +- nextflow.config | 32 +++++++--- subworkflows/local/prepare_helixfold3_dbs.nf | 62 +++++++++++++++----- workflows/helixfold3.nf | 28 +++++++-- 8 files changed, 170 insertions(+), 60 deletions(-) diff --git a/.gitignore b/.gitignore index 92fd8dfb..8070b718 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ testing/ testing* *.pyc null/ -conf/.* +.nfs* diff --git a/conf/dbs.config b/conf/dbs.config index fbbcfd13..aaf3015d 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -32,7 +32,7 @@ params { uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" uniref90_path = "${params.alphafold2_db}/uniref90/*" pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" - uniprot_path = "${params.alphafold2_db}/uniprot/*" + helixfold3_uniprot_path = "${params.alphafold2_db}/uniprot/*" // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' @@ -58,16 +58,23 @@ params { bfd_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/bfd/*" // Helixfold3 links - uniclust30_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' - ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' - rfam_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta' - helixfold3_init_models_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip' + helixfold3_uniclust30_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' + helixfold3_ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' + helixfold3_rfam_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta' + helixfold3_init_models_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip' // Helixfold3 paths - uniclust30_path = "${params.helixfold3_db}/uniclust30/*" - ccd_preprocessed_path = "${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" - rfam_path = "${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" - helixfold3_init_models_path = "${params.helixfold3_db}" + helixfold3_uniclust30_path = "${params.helixfold3_db}/uniclust30/*" + helixfold3_ccd_preprocessed_path = "${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" + helixfold3_rfam_path = "${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" + helixfold3_init_models_path = "${params.helixfold3_db}/HelixFold3-240814.pdparams" + helixfold3_bfd_path = "${params.helixfold3_db}/bfd/*" + helixfold3_small_bfd_path = "${params.helixfold3_db}/small_bfd/*" + helixfold3_uniprot_path = "${params.helixfold3_db}/uniprot/*" + helixfold3_pdb_seqres_path = "${params.helixfold3_db}/pdb_seqres/*" + helixfold3_uniref90_path = "${params.helixfold3_db}/uniref90/*" + helixfold3_mgnify_path = "${params.helixfold3_db}/mgnify/*" + helixfold3_pdb_mmcif_path = "${params.helixfold3_db}/pdb_mmcif/*" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/main.nf b/main.nf index b2fe5ac5..259232e2 100644 --- a/main.nf +++ b/main.nf @@ -265,12 +265,28 @@ workflow NFCORE_PROTEINFOLD { // SUBWORKFLOW: Prepare helixfold3 DBs // PREPARE_HELIXFOLD3_DBS ( - params.uniclust30_path, - params.ccd_preprocessed_path, - params.rfam_path, - params.uniclust30_path, - params.ccd_preprocessed_path, - params.rfam_path + params.helixfold3_uniclust30_link, + params.helixfold3_ccd_preprocessed_link, + params.helixfold3_rfam_link, + params.helixfold3_init_models_link, + params.helixfold3_bfd_link, + params.helixfold3_small_bfd_link, + params.helixfold3_uniprot_link, + params.helixfold3_pdb_seqres_link, + params.helixfold3_uniref90_link, + params.helixfold3_mgnify_link, + params.helixfold3_pdb_mmcif_link, + params.helixfold3_uniclust30_path, + params.helixfold3_ccd_preprocessed_path, + params.helixfold3_rfam_path, + params.helixfold3_init_models_path, + params.helixfold3_bfd_path, + params.helixfold3_small_bfd_path, + params.helixfold3_uniprot_path, + params.helixfold3_pdb_seqres_path, + params.helixfold3_uniref90_path, + params.helixfold3_mgnify_path, + params.helixfold3_pdb_mmcif_path ) ch_versions = ch_versions.mix(PREPARE_HELIXFOLD3_DBS.out.versions) @@ -280,9 +296,17 @@ workflow NFCORE_PROTEINFOLD { HELIXFOLD3 ( ch_samplesheet, ch_versions, - PREPARE_HELIXFOLD3_DBS.out.uniclust30, - PREPARE_HELIXFOLD3_DBS.out.ccd_preprocessed, - PREPARE_HELIXFOLD3_DBS.out.rfam + PREPARE_HELIXFOLD3_DBS.out.helixfold3_uniclust30, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_ccd_preprocessed, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_rfam, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_bfd, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_small_bfd, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_uniprot, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_pdb_seqres, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_uniref90, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_mgnify, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_pdb_mmcif, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models ) ch_multiqc = HELIXFOLD3.out.multiqc_report ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 4ffbe3b2..35d953f3 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -11,10 +11,21 @@ process RUN_HELIXFOLD3 { error("Local RUN_HELIXFOLD3 module does not support Conda. Please use Docker / Singularity / Podman / Apptainer instead.") } - container "/srv/scratch/sbf/apptainers/helixfold3.sif" + container "/srv/scratch/sbf-pipelines/proteinfold/singularity/helixfold3.sif" input: tuple val(meta), path(fasta) + path ('uniclust30/*') + path ('*') + path ('*') + path ('bfd/*') + path ('small_bfd/*') + path ('uniprot/*') + path ('pdb_seqres/*') + path ('uniref90/*') + path ('mgnify/*') + path ('pdb_mmcif/*') + path ('init_models/*') output: path ("${fasta.baseName}*") @@ -45,22 +56,22 @@ process RUN_HELIXFOLD3 { --hmmsearch_binary_path "/opt/miniforge/envs/helixfold/bin/hmmsearch" \ --hmmbuild_binary_path "/opt/miniforge/envs/helixfold/bin/hmmbuild" \ --preset='reduced_dbs' \ - --bfd_database_path="${params.alphafold2_db}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \ - --small_bfd_database_path="${params.helixfold3_db}/bfd-first_non_consensus_sequences.fasta" \ - --uniclust30_database_path="${params.helixfold3_db}/uniclust30/uniclust30_2018_08" \ - --uniprot_database_path="${params.alphafold2_db}/uniprot/uniprot.fasta" \ - --pdb_seqres_database_path="${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt" \ - --rfam_database_path="${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" \ - --template_mmcif_dir="${params.alphafold2_db}/pdb_mmcif/mmcif_files" \ - --obsolete_pdbs_path="${params.alphafold2_db}/pdb_mmcif/obsolete.dat" \ - --ccd_preprocessed_path="${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" \ - --uniref90_database_path "${params.helixfold3_db}/uniref90/uniref90.fasta" \ - --mgnify_database_path "${params.helixfold3_db}/mgnify/mgy_clusters_2018_12.fa" \ + --bfd_database_path="./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt" \ + --small_bfd_database_path="./small_bfd/bfd-first_non_consensus_sequences.fasta" \ + --uniclust30_database_path="./uniclust30/uniclust30_2018_08" \ + --uniprot_database_path="./uniprot/uniprot.fasta" \ + --pdb_seqres_database_path="./pdb_seqres/pdb_seqres.txt" \ + --rfam_database_path="./Rfam-14.9_rep_seq.fasta" \ + --template_mmcif_dir="./pdb_mmcif/mmcif_files" \ + --obsolete_pdbs_path="./pdb_mmcif/obsolete.dat" \ + --ccd_preprocessed_path="./ccd_preprocessed_etkdg.pkl.gz" \ + --uniref90_database_path "./uniref90/uniref90.fasta" \ + --mgnify_database_path "./mgnify/mgy_clusters_2018_12.fa" \ --max_template_date=2024-08-14 \ --input_json="${fasta}" \ --output_dir="\$PWD" \ --model_name allatom_demo \ - --init_model "${params.helixfold3_init_models_path}/HelixFold3-240814.pdparams" \ + --init_model "./init_models/HelixFold3-240814.pdparams" \ --infer_times 4 \ --diff_batch_size 1 \ --logging_level "ERROR" \ diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 7f4fda0a..908d667a 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -11,7 +11,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "/srv/scratch/sbf/apptainers/RoseTTAFold_All_Atom.sif" + container "/srv/scratch/sbf-pipelines/proteinfold/singularity/RoseTTAFold_All_Atom.sif" input: tuple val(meta), path(fasta) diff --git a/nextflow.config b/nextflow.config index 6c53e205..331667c9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -89,16 +89,30 @@ params { helixfold3_db = null // Helixfold3 links - uniclust30_link = null - ccd_preprocessed_link = null - rfam_link = null - helixfold3_init_models_link = null + helixfold3_uniclust30_link = null + helixfold3_ccd_preprocessed_link = null + helixfold3_rfam_link = null + helixfold3_init_models_link = null + helixfold3_bfd_link = null + helixfold3_small_bfd_link = null + helixfold3_uniprot_link = null + helixfold3_pdb_seqres_link = null + helixfold3_uniref90_link = null + helixfold3_mgnify_link = null + helixfold3_pdb_mmcif_link = null // Helixfold3 paths - uniclust30_path = null - ccd_preprocessed_path = null - rfam_path = null - helixfold3_init_models_path = null + helixfold3_uniclust30_path = null + helixfold3_ccd_preprocessed_path = null + helixfold3_rfam_path = null + helixfold3_init_models_path = null + helixfold3_bfd_path = null + helixfold3_small_bfd_path = null + helixfold3_uniprot_path = null + helixfold3_pdb_seqres_path = null + helixfold3_uniref90_path = null + helixfold3_mgnify_path = null + helixfold3_pdb_mmcif_path = null // Foldseek params foldseek_search = null @@ -255,7 +269,7 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - katana { includeConfig 'conf/katana.config' } + unsw_katana { includeConfig 'conf/unsw_katana.config' } test { includeConfig 'conf/test.config' } test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } diff --git a/subworkflows/local/prepare_helixfold3_dbs.nf b/subworkflows/local/prepare_helixfold3_dbs.nf index b96771cc..2c3076b5 100644 --- a/subworkflows/local/prepare_helixfold3_dbs.nf +++ b/subworkflows/local/prepare_helixfold3_dbs.nf @@ -1,22 +1,56 @@ workflow PREPARE_HELIXFOLD3_DBS { take: - uniclust30_path - ccd_preprocessed_path - rfam_path - uniclust30_link - ccd_preprocessed_link - rfam_link + helixfold3_uniclust30_link + helixfold3_ccd_preprocessed_link + helixfold3_rfam_link + helixfold3_init_models_link + helixfold3_bfd_link + helixfold3_small_bfd_link + helixfold3_uniprot_link + helixfold3_pdb_seqres_link + helixfold3_uniref90_link + helixfold3_mgnify_link + helixfold3_pdb_mmcif_link + helixfold3_uniclust30_path + helixfold3_ccd_preprocessed_path + helixfold3_rfam_path + helixfold3_init_models_path + helixfold3_bfd_path + helixfold3_small_bfd_path + helixfold3_uniprot_path + helixfold3_pdb_seqres_path + helixfold3_uniref90_path + helixfold3_mgnify_path + helixfold3_pdb_mmcif_path - main: - ch_uniclust30 = Channel.value(file(uniclust30_path)) - ch_ccd_preprocessed = Channel.value(file(ccd_preprocessed_path)) - ch_rfam = Channel.value(file(rfam_path)) + main: + ch_helixfold3_uniclust30 = Channel.value(file(helixfold3_uniclust30_path)) + ch_helixfold3_ccd_preprocessed = Channel.value(file(helixfold3_ccd_preprocessed_path)) + ch_helixfold3_rfam = Channel.value(file(helixfold3_rfam_path)) + ch_helixfold3_bfd = Channel.value(file(helixfold3_bfd_path)) + ch_helixfold3_small_bfd = Channel.value(file(helixfold3_small_bfd_path)) + ch_helixfold3_uniprot = Channel.value(file(helixfold3_uniprot_path)) + ch_helixfold3_pdb_seqres = Channel.value(file(helixfold3_pdb_seqres_path)) + ch_helixfold3_uniref90 = Channel.value(file(helixfold3_uniref90_path)) + ch_helixfold3_mgnify = Channel.value(file(helixfold3_mgnify_path)) + ch_mmcif_files = file(helixfold3_pdb_mmcif_path, type: 'dir') + ch_mmcif_obsolete = file(helixfold3_pdb_mmcif_path, type: 'file') + ch_helixfold3_pdb_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete) + ch_helixfold3_init_models = Channel.value(file(helixfold3_init_models_path)) ch_versions = Channel.empty() emit: - uniclust30 = ch_uniclust30 - ccd_preprocessed = ch_ccd_preprocessed - rfam = ch_rfam - versions = ch_versions + helixfold3_uniclust30 = ch_helixfold3_uniclust30 + helixfold3_ccd_preprocessed = ch_helixfold3_ccd_preprocessed + helixfold3_rfam = ch_helixfold3_rfam + helixfold3_bfd = ch_helixfold3_bfd + helixfold3_small_bfd = ch_helixfold3_small_bfd + helixfold3_uniprot = ch_helixfold3_uniprot + helixfold3_pdb_seqres = ch_helixfold3_pdb_seqres + helixfold3_uniref90 = ch_helixfold3_uniref90 + helixfold3_mgnify = ch_helixfold3_mgnify + helixfold3_pdb_mmcif = ch_helixfold3_pdb_mmcif + helixfold3_init_models = ch_helixfold3_init_models + versions = ch_versions } diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index a05cdc9f..1d29ecea 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -39,9 +39,18 @@ workflow HELIXFOLD3 { take: ch_samplesheet ch_versions // channel: [ path(versions.yml) ] - ch_uniclust30 - ch_ccd_preprocessed - ch_rfam + ch_helixfold3_uniclust30 + ch_helixfold3_ccd_preprocessed + ch_helixfold3_rfam + ch_helixfold3_bfd + ch_helixfold3_small_bfd + ch_helixfold3_uniprot + ch_helixfold3_pdb_seqres + ch_helixfold3_uniref90 + ch_helixfold3_mgnify + ch_helixfold3_pdb_mmcif + ch_helixfold3_init_models + main: ch_multiqc_files = Channel.empty() @@ -50,7 +59,18 @@ workflow HELIXFOLD3 { // SUBWORKFLOW: Run helixfold3 // RUN_HELIXFOLD3 ( - ch_samplesheet + ch_samplesheet, + ch_helixfold3_uniclust30, + ch_helixfold3_ccd_preprocessed, + ch_helixfold3_rfam, + ch_helixfold3_bfd, + ch_helixfold3_small_bfd, + ch_helixfold3_uniprot, + ch_helixfold3_pdb_seqres, + ch_helixfold3_uniref90, + ch_helixfold3_mgnify, + ch_helixfold3_pdb_mmcif, + ch_helixfold3_init_models ) ch_multiqc_rep = RUN_HELIXFOLD3.out.multiqc.collect() ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) From d3e62eb516cc5de4c01defc96de2dde1fd5778ca Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 18 Nov 2024 17:10:44 +1100 Subject: [PATCH 105/135] Katana config --- conf/unsw_katana.config | 70 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 conf/unsw_katana.config diff --git a/conf/unsw_katana.config b/conf/unsw_katana.config new file mode 100644 index 00000000..825610ea --- /dev/null +++ b/conf/unsw_katana.config @@ -0,0 +1,70 @@ +// UNSW Katana nf-core configuration profile + + +params { + config_profile_description = 'UNSW Katana HPC profile for nf-core/proteinfold' + config_profile_contact = '@jscgh' + config_profile_url = 'https://docs.restech.unsw.edu.au/' +} + +process { +// executor = 'pbspro' + queue = 'submission' + + resourceLimits = [ + memory: 500.GB, + cpus: 32, + time: 200.h + ] + + // TODO nf-core: Check the defaults for all processes + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_low { + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_medium { + cpus = { 6 * task.attempt } + memory = { 100.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_high { + cpus = { 8 * task.attempt } + memory = { 125.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_long { + time = { 24.h * task.attempt } + } + withLabel:process_high_memory { + memory = { 250.GB * task.attempt } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 1 + } + withLabel:gpu_compute { + accelerator = 1 + clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=${task.cpus} -l mem=${task.memory.toMega()}mb" } + } + + withName: 'RUN_HELIXFOLD3' { + containerOptions = "--bind \${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" + } +} From 94f47dd945fc3879650b7b4770242381dd2a34ee Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 19 Nov 2024 10:44:07 +1100 Subject: [PATCH 106/135] Merged with origin/align-modules-to-nf-core for new HF3 path variables --- conf/dbs.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/dbs.config b/conf/dbs.config index aaf3015d..09f1c510 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -32,7 +32,7 @@ params { uniref30_alphafold2_path = "${params.alphafold2_db}/uniref30/*" uniref90_path = "${params.alphafold2_db}/uniref90/*" pdb_seqres_path = "${params.alphafold2_db}/pdb_seqres/*" - helixfold3_uniprot_path = "${params.alphafold2_db}/uniprot/*" + uniprot_path = "${params.alphafold2_db}/uniprot/*" // Colabfold links colabfold_db_link = 'http://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz' From c6d14a2da6fc7175006a864ac20150cd5af6de71 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 19 Nov 2024 10:56:41 +1100 Subject: [PATCH 107/135] Updated schema with nf-core pipelines schema build --- nextflow_schema.json | 73 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 3f638e62..a9935637 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -689,39 +689,88 @@ "type": "string", "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" }, - "uniclust30_link": { + "helixfold3_init_models_link": { + "type": "string", + "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip" + }, + "helixfold3_init_models_path": { + "type": "string" + }, + "helixfold3_db": { + "type": "string" + }, + "helixfold3_uniclust30_link": { "type": "string", "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz" }, - "ccd_preprocessed_link": { + "helixfold3_ccd_preprocessed_link": { "type": "string", "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz" }, - "rfam_link": { + "helixfold3_rfam_link": { "type": "string", "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta" }, - "uniclust30_path": { + "helixfold3_bfd_link": { + "type": "string" + }, + "helixfold3_small_bfd_link": { + "type": "string" + }, + "helixfold3_uniprot_link": { + "type": "string" + }, + "helixfold3_pdb_seqres_link": { + "type": "string" + }, + "helixfold3_uniref90_link": { + "type": "string" + }, + "helixfold3_mgnify_link": { + "type": "string" + }, + "helixfold3_pdb_mmcif_link": { + "type": "string" + }, + "helixfold3_uniclust30_path": { "type": "string", "default": "null/uniclust30/*" }, - "ccd_preprocessed_path": { + "helixfold3_ccd_preprocessed_path": { "type": "string", "default": "null/ccd_preprocessed_etkdg.pkl.gz" }, - "rfam_path": { + "helixfold3_rfam_path": { "type": "string", "default": "null/Rfam-14.9_rep_seq.fasta" }, - "helixfold3_init_models_link": { + "helixfold3_bfd_path": { "type": "string", - "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip" + "default": "null/bfd/*" }, - "helixfold3_init_models_path": { - "type": "string" + "helixfold3_small_bfd_path": { + "type": "string", + "default": "null/small_bfd/*" }, - "helixfold3_db": { - "type": "string" + "helixfold3_uniprot_path": { + "type": "string", + "default": "null/uniprot/*" + }, + "helixfold3_pdb_seqres_path": { + "type": "string", + "default": "null/pdb_seqres/*" + }, + "helixfold3_uniref90_path": { + "type": "string", + "default": "null/uniref90/*" + }, + "helixfold3_mgnify_path": { + "type": "string", + "default": "null/mgnify/*" + }, + "helixfold3_pdb_mmcif_path": { + "type": "string", + "default": "null/pdb_mmcif/*" } } } From 5fa189a0d45c2b0d5d43c716e154c204c44acf9b Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 19 Nov 2024 11:29:09 +1100 Subject: [PATCH 108/135] Updated schema with nf-core pipelines schema build --- nextflow_schema.json | 50 +++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index a9935637..b53ba4d5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -189,7 +189,8 @@ "type": "string", "description": "Specifies whether is a 'monomer' or 'multimer' prediction", "enum": ["monomer", "multimer"], - "fa_icon": "fas fa-stream" + "fa_icon": "fas fa-stream", + "default": "monomer" } } }, @@ -380,52 +381,62 @@ "bfd_path": { "type": "string", "description": "Path to BFD dababase", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/bfd/*" }, "small_bfd_path": { "type": "string", "description": "Path to a reduced version of the BFD database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/small_bfd/*" }, "alphafold2_params_path": { "type": "string", "description": "Path to the Alphafold2 parameters", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/alphafold_params_*/*" }, "mgnify_path": { "type": "string", "description": "Path to the MGnify database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/mgnify/*" }, "pdb70_path": { "type": "string", "description": "Path to the PDB70 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb70/**" }, "pdb_mmcif_path": { "type": "string", "description": "Path to the PDB mmCIF database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb_mmcif/*" }, "uniref30_alphafold2_path": { "type": "string", "description": "Path to the Uniref30 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref30/*" }, "uniref90_path": { "type": "string", "description": "Path to the UniRef90 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref90/*" }, "pdb_seqres_path": { "type": "string", "description": "Path to the PDB SEQRES database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/pdb_seqres/*" }, "uniprot_path": { "type": "string", "description": "Path to UniProt database containing the SwissProt and the TrEMBL databases", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniprot/*" } } }, @@ -463,12 +474,14 @@ "colabfold_db_path": { "type": "string", "description": "Link to the Colabfold database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/colabfold_envdb_202108" }, "uniref30_colabfold_path": { "type": "string", "description": "Link to the UniRef30 database", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/uniref30_2302" }, "colabfold_alphafold2_params_path": { "type": "string", @@ -476,9 +489,10 @@ "fa_icon": "fas fa-folder-open" }, "colabfold_alphafold2_params_tags": { - "type": "object", + "type": "string", "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream" + "fa_icon": "fas fa-stream", + "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" } } }, @@ -517,7 +531,8 @@ "esmfold_params_path": { "type": "string", "description": "Link to the Esmfold parameters", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "null/*" } } }, @@ -694,7 +709,8 @@ "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip" }, "helixfold3_init_models_path": { - "type": "string" + "type": "string", + "default": "null/HelixFold3-240814.pdparams" }, "helixfold3_db": { "type": "string" From 2ede145409d014b6693fcc84664b72711c88a87a Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 19 Nov 2024 11:53:18 +1100 Subject: [PATCH 109/135] nf-core pipelines lint passed --- nextflow_schema.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index b53ba4d5..e12a1eda 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -75,7 +75,6 @@ }, "full_dbs": { "type": "boolean", - "default": false, "description": "If true uses the full version of the BFD database otherwise, otherwise it uses its reduced version, small bfd", "fa_icon": "fas fa-battery-full" }, @@ -491,8 +490,7 @@ "colabfold_alphafold2_params_tags": { "type": "string", "description": "Dictionary with Alphafold2 parameters tags", - "fa_icon": "fas fa-stream", - "default": "[alphafold2_multimer_v1:'alphafold_params_colab_2021-10-27', alphafold2_multimer_v2:'alphafold_params_colab_2022-03-02', alphafold2_multimer_v3:'alphafold_params_colab_2022-12-06', alphafold2_ptm:'alphafold_params_2021-07-14']" + "fa_icon": "fas fa-stream" } } }, From 3c6a8cbc2e91d944e07c2202296bb93bf3e849ae Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 12:02:40 +1100 Subject: [PATCH 110/135] Added Helixfold3 module --- main.nf | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/main.nf b/main.nf index d1ec1a6b..db183cfc 100644 --- a/main.nf +++ b/main.nf @@ -27,6 +27,14 @@ if (params.mode.toLowerCase().split(",").contains("esmfold")) { include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' include { ESMFOLD } from './workflows/esmfold' } +if (params.mode == "rosettafold_all_atom") { + include { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS } from './subworkflows/local/prepare_rosettafold_all_atom_dbs' + include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold_all_atom' +} +if (params.mode == "helixfold3") { + include { PREPARE_HELIXFOLD3_DBS } from './subworkflows/local/prepare_helixfold3_dbs' + include { HELIXFOLD3 } from './workflows/helixfold3' +} include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_proteinfold_pipeline' @@ -205,6 +213,91 @@ workflow NFCORE_PROTEINFOLD { ch_report_input = ch_report_input.mix(ESMFOLD.out.pdb_msa) } + // + // WORKFLOW: Run rosettafold_all_atom + // + if(params.mode == "rosettafold_all_atom") { + // + // SUBWORKFLOW: Prepare Rosettafold-all-atom DBs + // + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS ( + params.bfd_rosettafold_all_atom_path, + params.uniref30_rosettafold_all_atom_path, + params.pdb100_path + ) + ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/rosettafold_all_atom workflow + // + ROSETTAFOLD_ALL_ATOM ( + ch_samplesheet, + ch_versions, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30, + PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100 + ) + ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report + ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) + } + + + // + // WORKFLOW: Run helixfold3 + // + if(params.mode == "helixfold3") { + // + // SUBWORKFLOW: Prepare helixfold3 DBs + // + PREPARE_HELIXFOLD3_DBS ( + params.helixfold3_uniclust30_link, + params.helixfold3_ccd_preprocessed_link, + params.helixfold3_rfam_link, + params.helixfold3_init_models_link, + params.helixfold3_bfd_link, + params.helixfold3_small_bfd_link, + params.helixfold3_uniprot_link, + params.helixfold3_pdb_seqres_link, + params.helixfold3_uniref90_link, + params.helixfold3_mgnify_link, + params.helixfold3_pdb_mmcif_link, + params.helixfold3_uniclust30_path, + params.helixfold3_ccd_preprocessed_path, + params.helixfold3_rfam_path, + params.helixfold3_init_models_path, + params.helixfold3_bfd_path, + params.helixfold3_small_bfd_path, + params.helixfold3_uniprot_path, + params.helixfold3_pdb_seqres_path, + params.helixfold3_uniref90_path, + params.helixfold3_mgnify_path, + params.helixfold3_pdb_mmcif_path + ) + ch_versions = ch_versions.mix(PREPARE_HELIXFOLD3_DBS.out.versions) + + // + // WORKFLOW: Run nf-core/helixfold3 workflow + // + HELIXFOLD3 ( + ch_samplesheet, + ch_versions, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_uniclust30, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_ccd_preprocessed, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_rfam, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_bfd, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_small_bfd, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_uniprot, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_pdb_seqres, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_uniref90, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_mgnify, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_pdb_mmcif, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models + ) + ch_multiqc = HELIXFOLD3.out.multiqc_report + ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) + } + + // // POST PROCESSING: generate visualisation reports // From 52b4c2a9863d359fbfdcfdfd54415e7b9f1748b3 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 12:09:51 +1100 Subject: [PATCH 111/135] Aligned to nf-core dev --- .github/CONTRIBUTING.md | 12 ++-- .github/workflows/ci.yml | 68 ++++++++++++++++--- .github/workflows/download_pipeline.yml | 53 ++++++++++++--- .github/workflows/linting.yml | 23 +++++-- .github/workflows/release-announcements.yml | 2 +- .../workflows/template_version_comment.yml | 46 +++++++++++++ modules/local/run_alphafold2/main.nf | 10 +-- modules/local/run_alphafold2_msa/main.nf | 11 ++- modules/local/run_alphafold2_pred/main.nf | 6 +- nextflow.config | 4 +- 10 files changed, 190 insertions(+), 45 deletions(-) create mode 100644 .github/workflows/template_version_comment.yml diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index ad8a7f87..5d64f953 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -19,7 +19,7 @@ If you'd like to write some code for nf-core/proteinfold, the standard workflow 1. Check that there isn't already an issue about your idea in the [nf-core/proteinfold issues](https://github.com/nf-core/proteinfold/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/proteinfold repository](https://github.com/nf-core/proteinfold) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). @@ -40,7 +40,7 @@ There are typically two types of tests that run: ### Lint tests `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -75,7 +75,7 @@ If you wish to contribute a new step, please use the following coding standards: 2. Write the process block (see below). 3. Define the output channel if needed (see below). 4. Add any new parameters to `nextflow.config` with a default (see below). -5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test command in `.github/workflow/ci.yml`. @@ -86,11 +86,11 @@ If you wish to contribute a new step, please use the following coding standards: Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. -Once there, use `nf-core schema build` to add to `nextflow_schema.json`. +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. ### Default processes resource requirements -Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. @@ -103,7 +103,7 @@ Please use the following naming schemes, to make it easy to understand what is g ### Nextflow version bumping -If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` ### Images and figures diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47ad6707..161ca5e8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,9 +7,12 @@ on: pull_request: release: types: [published] + workflow_dispatch: env: NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -17,16 +20,22 @@ concurrency: jobs: test: - name: Run pipeline with test data + name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.profile }} | ${{ matrix.test_profile }})" # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/proteinfold') }}" runs-on: ubuntu-latest strategy: matrix: NXF_VER: - - "23.04.0" + - "24.04.2" - "latest-everything" - parameters: + profile: + - "conda" + - "docker" + - "singularity" + test_name: + - "test" + test_profile: - "test" - "test_alphafold2_split" - "test_alphafold2_download" @@ -34,19 +43,62 @@ jobs: - "test_colabfold_webserver" - "test_colabfold_download" - "test_esmfold" - + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" steps: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Install Nextflow + - name: Set up Nextflow uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Disk space cleanup + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data ${{ matrix.parameters }} profile + - name: Run pipeline with test data (docker) run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.parameters }},docker --outdir ./results_${{ matrix.parameters }} + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_profile }},docker --outdir ./results + + - name: Run pipeline with test data (singularity) + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_profile }},singularity --outdir ./results + if: "${{ github.base_ref == 'master' }}" + + # ## Warning: Pipeline can not be run with conda + # - name: Run pipeline with test data (conda) + # run: | + # nextflow run ${GITHUB_WORKSPACE} -profile test,conda --outdir ./results + # if: "${{ github.base_ref == 'master' }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml index 640ac03c..51f84a59 100644 --- a/.github/workflows/download_pipeline.yml +++ b/.github/workflows/download_pipeline.yml @@ -1,4 +1,4 @@ -name: Test successful pipeline download with 'nf-core download' +name: Test successful pipeline download with 'nf-core pipelines download' # Run the workflow when: # - dispatched manually @@ -8,7 +8,7 @@ on: workflow_dispatch: inputs: testbranch: - description: "The specific branch you wish to utilize for the test execution of nf-core download." + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." required: true default: "dev" pull_request: @@ -39,9 +39,11 @@ jobs: with: python-version: "3.12" architecture: "x64" - - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 with: - singularity-version: 3.8.3 + apptainer-version: 1.3.4 - name: Install dependencies run: | @@ -54,33 +56,64 @@ jobs: echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + - name: Download the pipeline env: - NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ + nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ --revision ${{ env.REPO_BRANCH }} \ --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ --compress "none" \ --container-system 'singularity' \ - --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \ --container-cache-utilisation 'amend' \ - --download-configuration + --download-configuration 'yes' - name: Inspect download run: tree ./${{ env.REPOTITLE_LOWERCASE }} + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} + - name: Run the downloaded pipeline (stub) id: stub_run_pipeline continue-on-error: true env: - NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images NXF_SINGULARITY_HOME_MOUNT: true run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results - name: Run the downloaded pipeline (stub run not supported) id: run_pipeline if: ${{ job.steps.stub_run_pipeline.status == failure() }} env: - NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images NXF_SINGULARITY_HOME_MOUNT: true run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} + + - name: Compare container image counts + run: | + if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ env.IMAGE_COUNT_INITIAL }} + final_count=${{ env.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1fcafe88..a502573c 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,6 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: push: @@ -41,17 +41,32 @@ jobs: python-version: "3.12" architecture: "x64" + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - name: Run nf-core lint + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 03ecfcf7..c6ba35df 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -12,7 +12,7 @@ jobs: - name: get topics and convert to hashtags id: get_topics run: | - echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT - uses: rzr/fediverse-action@master with: diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml new file mode 100644 index 00000000..e8aafe44 --- /dev/null +++ b/.github/workflows/template_version_comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2/main.nf index e05aec6c..6ebd3c1d 100644 --- a/modules/local/run_alphafold2/main.nf +++ b/modules/local/run_alphafold2/main.nf @@ -43,13 +43,16 @@ process RUN_ALPHAFOLD2 { def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 " + alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " } """ - if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi + if [ -f pdb_seqres/pdb_seqres.txt ] + then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt + fi + if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi python3 /app/alphafold/run_alphafold.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ @@ -61,7 +64,6 @@ process RUN_ALPHAFOLD2 { --template_mmcif_dir=./pdb_mmcif/mmcif_files \ --obsolete_pdbs_path=./pdb_mmcif/obsolete.dat \ --random_seed=53343 \ - --use_gpu_relax \ $args cp "${fasta.baseName}"/ranked_0.pdb ./"${meta.id}"_alphafold2.pdb diff --git a/modules/local/run_alphafold2_msa/main.nf b/modules/local/run_alphafold2_msa/main.nf index 1de3daea..7428eb7f 100644 --- a/modules/local/run_alphafold2_msa/main.nf +++ b/modules/local/run_alphafold2_msa/main.nf @@ -40,16 +40,14 @@ process RUN_ALPHAFOLD2_MSA { def db_preset = db_preset ? "full_dbs --bfd_database_path=./bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt --uniref30_database_path=./uniref30/UniRef30_2021_03" : "reduced_dbs --small_bfd_database_path=./small_bfd/bfd-first_non_consensus_sequences.fasta" if (alphafold2_model_preset == 'multimer') { - alphafold2_model_preset += " --pdb_seqres_database_path=${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt --uniprot_database_path=${params.alphafold2_db}/uniprot/uniprot.fasta " + alphafold2_model_preset += " --pdb_seqres_database_path=./pdb_seqres/pdb_seqres.txt --uniprot_database_path=./uniprot/uniprot.fasta " } else { - alphafold2_model_preset += " --pdb70_database_path=${params.alphafold2_db}/pdb70/pdb70_from_mmcif_200916/pdb70 " + alphafold2_model_preset += " --pdb70_database_path=./pdb70/pdb70_from_mmcif_200916/pdb70 " } """ - RUNTIME_TMP=\$(mktemp -d) - if [ -f ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt ] - cp ${params.alphafold2_db}/pdb_seqres/pdb_seqres.txt \${RUNTIME_TMP} - then sed -i "/^\\w*0/d" \${RUNTIME_TMP}/pdb_seqres.txt + if [ -f pdb_seqres/pdb_seqres.txt ] + then sed -i "/^\\w*0/d" pdb_seqres/pdb_seqres.txt fi python3 /app/alphafold/run_msa.py \ --fasta_paths=${fasta} \ @@ -64,7 +62,6 @@ process RUN_ALPHAFOLD2_MSA { $args cp "${fasta.baseName}"/features.pkl ./"${fasta.baseName}".features.pkl - rm -rf "\${RUNTIME_TMP}" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred/main.nf index e385faf6..13fb15a7 100644 --- a/modules/local/run_alphafold2_pred/main.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -10,7 +10,7 @@ process RUN_ALPHAFOLD2_PRED { error("Local RUN_ALPHAFOLD2_PRED module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "nf-core/proteinfold_alphafold2_split:1.1.1" + container "nf-core/proteinfold_alphafold2_split:dev" input: tuple val(meta), path(fasta) @@ -42,14 +42,14 @@ process RUN_ALPHAFOLD2_PRED { script: def args = task.ext.args ?: '' """ - if [ -d ${params.alphafold2_db}/params/ ]; then ln -r -s ${params.alphafold2_db}/params params; fi + if [ -d params/alphafold_params_* ]; then ln -r -s params/alphafold_params_*/* params/; fi python3 /app/alphafold/run_predict.py \ --fasta_paths=${fasta} \ --model_preset=${alphafold2_model_preset} \ --output_dir=\$PWD \ --data_dir=\$PWD \ + --random_seed=53343 \ --msa_path=${msa} \ - --use_gpu_relax \ $args cp "${fasta.baseName}"/ranked_0.pdb ./"${meta.id}"_alphafold2.pdb diff --git a/nextflow.config b/nextflow.config index 331667c9..a8e07444 100644 --- a/nextflow.config +++ b/nextflow.config @@ -289,10 +289,10 @@ profiles { } // Load nf-core custom profiles from different Institutions -//includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/proteinfold custom profiles from different institutions. -//includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/proteinfold.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled From 0b889187b791cb2efd5d3536dd1ef55672357280 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 12:18:41 +1100 Subject: [PATCH 112/135] Started updating documentation --- CHANGELOG.md | 4 ++++ README.md | 30 +++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b947eccd..1f3ab33d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[PR ##205](https://github.com/nf-core/proteinfold/pull/205)] - Change input schema from `sequence,fasta` to `id,fasta`. - [[PR #210](https://github.com/nf-core/proteinfold/pull/210)]- Moving post-processing logic to a subworkflow, change wave images pointing to oras to point to https and refactor module to match nf-core folder structure. - [[#214](https://github.com/nf-core/proteinfold/issues/214)]- Fix colabfold image to run in cpus after [#188](https://github.com/nf-core/proteinfold/issues/188) fix. +- [[PR ##220](https://github.com/nf-core/proteinfold/pull/220)] - Add RoseTTAFold-All-Atom module. +- [[PR ##221](https://github.com/nf-core/proteinfold/pull/221)] - Add HelixFold3 module. ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 @@ -105,6 +107,8 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements | | `--esm2_t36_3B_UR50D_contact_regression` | | | `--esmfold_params_path` | | | `--skip_multiqc` | +| | `--rosettafold_all_atom_db` | +| | `--helixfold3_db` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. diff --git a/README.md b/README.md index 0cbbbea7..6fc86065 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,10 @@ On release, automated continuous integration tests run the pipeline on a full-si v. [ESMFold](https://github.com/facebookresearch/esm) - Regular ESM + vi. [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) - Regular RFAA + + vii. [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) - Regular HF3 + ## Usage > [!NOTE] @@ -55,7 +59,7 @@ nextflow run nf-core/proteinfold \ --outdir ``` -The pipeline takes care of downloading the databases and parameters required by AlphaFold2, Colabfold or ESMFold. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`] or [`--esmfold_db`]. Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you need to provide for each of the databases. +The pipeline takes care of downloading the databases and parameters required by AlphaFold2, Colabfold or ESMFold. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the corresponding parameter [`--alphafold2_db`], [`--colabfold_db`], [`--esmfold_db`] or ['--rosettafold_all_atom_db']. Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you need to provide for each of the databases. - The typical command to run AlphaFold2 mode is shown below: @@ -138,6 +142,30 @@ The pipeline takes care of downloading the databases and parameters required by -profile ``` +- The rosettafold_all_atom mode can be run using the command below: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode rosettafold_all_atom \ + --rosettafold_all_atom_db \ + --use_gpu \ + -profile + ``` + +- The helixfold3 mode can be run using the command below: + + ```console + nextflow run nf-core/proteinfold \ + --input samplesheet.csv \ + --outdir \ + --mode helixfold3 \ + --helixfold3_db \ + --use_gpu \ + -profile + ``` + > [!WARNING] > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). From b115d0f2f26c2613c8ae092b39a43c9330e2e1e4 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 13:24:25 +1100 Subject: [PATCH 113/135] Added download functionality to prepare_helixfold3_dbs --- conf/dbs.config | 31 ++++-- docs/output.md | 2 + nextflow_schema.json | 2 +- subworkflows/local/prepare_helixfold3_dbs.nf | 110 ++++++++++++++++--- 4 files changed, 118 insertions(+), 27 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 09f1c510..d840db71 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -62,19 +62,28 @@ params { helixfold3_ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' helixfold3_rfam_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta' helixfold3_init_models_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip' + helixfold3_bfd_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz' + helixfold3_small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' + helixfold3_uniprot_sprot_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' + helixfold3_uniprot_trembl_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' + helixfold3_pdb_seqres_link = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' + helixfold3_uniref90_link = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' + helixfold3_mgnify_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz' + helixfold3_pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' + helixfold3_pdb_obsolete_link = 'ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat' // Helixfold3 paths - helixfold3_uniclust30_path = "${params.helixfold3_db}/uniclust30/*" - helixfold3_ccd_preprocessed_path = "${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" - helixfold3_rfam_path = "${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" - helixfold3_init_models_path = "${params.helixfold3_db}/HelixFold3-240814.pdparams" - helixfold3_bfd_path = "${params.helixfold3_db}/bfd/*" - helixfold3_small_bfd_path = "${params.helixfold3_db}/small_bfd/*" - helixfold3_uniprot_path = "${params.helixfold3_db}/uniprot/*" - helixfold3_pdb_seqres_path = "${params.helixfold3_db}/pdb_seqres/*" - helixfold3_uniref90_path = "${params.helixfold3_db}/uniref90/*" - helixfold3_mgnify_path = "${params.helixfold3_db}/mgnify/*" - helixfold3_pdb_mmcif_path = "${params.helixfold3_db}/pdb_mmcif/*" + helixfold3_uniclust30_path = "${params.helixfold3_db}/uniclust30/*" + helixfold3_ccd_preprocessed_path = "${params.helixfold3_db}/ccd_preprocessed_etkdg.pkl.gz" + helixfold3_rfam_path = "${params.helixfold3_db}/Rfam-14.9_rep_seq.fasta" + helixfold3_init_models_path = "${params.helixfold3_db}/HelixFold3-240814.pdparams" + helixfold3_bfd_path = "${params.helixfold3_db}/bfd/*" + helixfold3_small_bfd_path = "${params.helixfold3_db}/small_bfd/*" + helixfold3_uniprot_path = "${params.helixfold3_db}/uniprot/*" + helixfold3_pdb_seqres_path = "${params.helixfold3_db}/pdb_seqres/*" + helixfold3_uniref90_path = "${params.helixfold3_db}/uniref90/*" + helixfold3_mgnify_path = "${params.helixfold3_db}/mgnify/*" + helixfold3_pdb_mmcif_path = "${params.helixfold3_db}/pdb_mmcif/*" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/docs/output.md b/docs/output.md index 9b9a8fb8..f3119a81 100644 --- a/docs/output.md +++ b/docs/output.md @@ -13,6 +13,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and predicts pr - [AlphaFold2](https://github.com/deepmind/alphafold) - [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 (API server or local search) followed by ColabFold - [ESMFold](https://github.com/facebookresearch/esm) +- [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) +- [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) See main [README.md](https://github.com/nf-core/proteinfold/blob/master/README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. diff --git a/nextflow_schema.json b/nextflow_schema.json index e12a1eda..ae2573bf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -488,7 +488,7 @@ "fa_icon": "fas fa-folder-open" }, "colabfold_alphafold2_params_tags": { - "type": "string", + "type": "object", "description": "Dictionary with Alphafold2 parameters tags", "fa_icon": "fas fa-stream" } diff --git a/subworkflows/local/prepare_helixfold3_dbs.nf b/subworkflows/local/prepare_helixfold3_dbs.nf index 2c3076b5..96f57e53 100644 --- a/subworkflows/local/prepare_helixfold3_dbs.nf +++ b/subworkflows/local/prepare_helixfold3_dbs.nf @@ -1,3 +1,26 @@ +// +// Download all the required AlphaFold 2 databases and parameters +// + +include { + ARIA2_UNCOMPRESS as ARIA2_UNICLUST30 + ARIA2_UNCOMPRESS as ARIA2_CCD_PREPROCESSED + ARIA2_UNCOMPRESS as ARIA2_RFAM + ARIA2_UNCOMPRESS as ARIA2_BFD + ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD + ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT + ARIA2_UNCOMPRESS as ARIA2_UNIPROT_TREMBL + ARIA2_UNCOMPRESS as ARIA2_PDB_SEQRES + ARIA2_UNCOMPRESS as ARIA2_UNIREF90 + ARIA2_UNCOMPRESS as ARIA2_MGNIFY + ARIA2_UNCOMPRESS as ARIA2_INIT_MODELS +} from './aria2_uncompress' + +include { ARIA2 as ARIA2_PDB_SEQRES } from '../../modules/nf-core/aria2/main' + +include { COMBINE_UNIPROT } from '../../modules/local/combine_uniprot' +include { DOWNLOAD_PDBMMCIF } from '../../modules/local/download_pdbmmcif' + workflow PREPARE_HELIXFOLD3_DBS { take: @@ -25,20 +48,77 @@ workflow PREPARE_HELIXFOLD3_DBS { helixfold3_pdb_mmcif_path main: - ch_helixfold3_uniclust30 = Channel.value(file(helixfold3_uniclust30_path)) - ch_helixfold3_ccd_preprocessed = Channel.value(file(helixfold3_ccd_preprocessed_path)) - ch_helixfold3_rfam = Channel.value(file(helixfold3_rfam_path)) - ch_helixfold3_bfd = Channel.value(file(helixfold3_bfd_path)) - ch_helixfold3_small_bfd = Channel.value(file(helixfold3_small_bfd_path)) - ch_helixfold3_uniprot = Channel.value(file(helixfold3_uniprot_path)) - ch_helixfold3_pdb_seqres = Channel.value(file(helixfold3_pdb_seqres_path)) - ch_helixfold3_uniref90 = Channel.value(file(helixfold3_uniref90_path)) - ch_helixfold3_mgnify = Channel.value(file(helixfold3_mgnify_path)) - ch_mmcif_files = file(helixfold3_pdb_mmcif_path, type: 'dir') - ch_mmcif_obsolete = file(helixfold3_pdb_mmcif_path, type: 'file') - ch_helixfold3_pdb_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete) - ch_helixfold3_init_models = Channel.value(file(helixfold3_init_models_path)) - ch_versions = Channel.empty() + if (helixofld3_db) { + ch_helixfold3_uniclust30 = Channel.value(file(helixfold3_uniclust30_path)) + ch_helixfold3_ccd_preprocessed = Channel.value(file(helixfold3_ccd_preprocessed_path)) + ch_helixfold3_rfam = Channel.value(file(helixfold3_rfam_path)) + ch_helixfold3_bfd = Channel.value(file(helixfold3_bfd_path)) + ch_helixfold3_small_bfd = Channel.value(file(helixfold3_small_bfd_path)) + ch_helixfold3_uniprot = Channel.value(file(helixfold3_uniprot_path)) + ch_helixfold3_pdb_seqres = Channel.value(file(helixfold3_pdb_seqres_path)) + ch_helixfold3_uniref90 = Channel.value(file(helixfold3_uniref90_path)) + ch_helixfold3_mgnify = Channel.value(file(helixfold3_mgnify_path)) + ch_mmcif_files = file(helixfold3_pdb_mmcif_path, type: 'dir') + ch_mmcif_obsolete = file(helixfold3_pdb_mmcif_path, type: 'file') + ch_helixfold3_pdb_mmcif = Channel.value(ch_mmcif_files + ch_mmcif_obsolete) + ch_helixfold3_init_models = Channel.value(file(helixfold3_init_models_path)) + } + else { + ARIA2_UNICLUST30(helixfold3_uniclust30_link) + ch_helixfold3_uniclust30 = ARIA2_UNICLUST30.out.db + ch_versions = ch_versions.mix(ARIA2_UNICLUST30.out.versions) + + ARIA2_CCD_PREPROCESSED(helixfold3_ccd_preprocessed_link) + ch_helixfold3_ccd_preprocessed = ARIA2_CCD_PREPROCESSED.out.db + ch_versions = ch_versions.mix(ARIA2_CCD_PREPROCESSED.out.versions) + + ARIA2_RFAM(helixfold3_rfam_link) + ch_helixfold3_rfam = ARIA2_RFAM.out.db + ch_versions = ch_versions.mix(ARIA2_RFAM.out.versions) + + ARIA2_BFD(helixfold3_bfd_link) + ch_helixfold3_bfd = ARIA2_BFD.out.db + ch_versions = ch_versions.mix(ARIA2_BFD.out.versions) + + ARIA2_SMALL_BFD(helixfold3_small_bfd_link) + ch_helixfold3_small_bfd = ARIA2_SMALL_BFD.out.db + ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) + + ARIA2_PDB_SEQRES(helixfold3_pdb_seqres_link) + ch_helixfold3_pdb_seqres = ARIA2_PDB_SEQRES.out.db + ch_versions = ch_versions.mix(ARIA2_PDB_SEQRES.out.versions) + + ARIA2_UNIREF90(helixfold3_uniref90_link) + ch_helixfold3_uniref90 = ARIA2_UNIREF90.out.db + ch_versions = ch_versions.mix(ARIA2_UNIREF90.out.versions) + + ARIA2_MGNIFY(helixfold3_mgnify_link) + ch_helixfold3_mgnify = ARIA2_MGNIFY.out.db + ch_versions = ch_versions.mix(ARIA2_MGNIFY.out.versions) + + DOWNLOAD_PDBMMCIF(helixfold3_pdb_mmcif_link, helixfold3_pdb_obsolete_link) + ch_helixfold3_pdb_mmcif = DOWNLOAD_PDBMMCIF.out.ch_db + ch_versions = ch_versions.mix(DOWNLOAD_PDBMMCIF.out.versions) + + ARIA2_INIT_MODELS(helixfold3_init_models_link) + ch_helixfold3_init_models = ARIA2_INIT_MODELS.out.db + ch_versions = ch_versions.mix(ARIA2_INIT_MODELS.out.versions) + + ARIA2_UNIPROT_SPROT( + helixfold3_uniprot_sprot_link + ) + ch_versions = ch_versions.mix(ARIA2_UNIPROT_SPROT.out.versions) + ARIA2_UNIPROT_TREMBL( + helixfold3_uniprot_trembl_link + ) + ch_versions = ch_versions.mix(ARIA2_UNIPROT_TREMBL.out.versions) + COMBINE_UNIPROT ( + ARIA2_UNIPROT_SPROT.out.db, + ARIA2_UNIPROT_TREMBL.out.db + ) + ch_helixfold3_uniprot = COMBINE_UNIPROT.out.ch_db + ch_version = ch_versions.mix(COMBINE_UNIPROT.out.versions) + } emit: helixfold3_uniclust30 = ch_helixfold3_uniclust30 @@ -52,5 +132,5 @@ workflow PREPARE_HELIXFOLD3_DBS { helixfold3_mgnify = ch_helixfold3_mgnify helixfold3_pdb_mmcif = ch_helixfold3_pdb_mmcif helixfold3_init_models = ch_helixfold3_init_models - versions = ch_versions + versions = ch_versions } From a7e326dd8820c52ff49dd80b15acc15b1607baae Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 14:03:31 +1100 Subject: [PATCH 114/135] Added variables for downloading hf3 dbs --- conf/dbs.config | 1 + main.nf | 10 +++-- modules/local/run_helixfold3.nf | 3 +- nextflow.config | 47 +++++++++++--------- nextflow_schema.json | 37 +++++++++++---- subworkflows/local/prepare_helixfold3_dbs.nf | 11 +++-- workflows/helixfold3.nf | 5 ++- 7 files changed, 74 insertions(+), 40 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index d840db71..a8e8ab23 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -84,6 +84,7 @@ params { helixfold3_uniref90_path = "${params.helixfold3_db}/uniref90/*" helixfold3_mgnify_path = "${params.helixfold3_db}/mgnify/*" helixfold3_pdb_mmcif_path = "${params.helixfold3_db}/pdb_mmcif/*" + helixfold3_maxit_src_path = "${params.helixfold3_db}/maxit-v11.200-prod-src/*" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/main.nf b/main.nf index db183cfc..3cc8bc76 100644 --- a/main.nf +++ b/main.nf @@ -256,11 +256,13 @@ workflow NFCORE_PROTEINFOLD { params.helixfold3_init_models_link, params.helixfold3_bfd_link, params.helixfold3_small_bfd_link, - params.helixfold3_uniprot_link, + params.helixfold3_uniprot_sprot_link, + params.helixfold3_uniprot_trembl_link, params.helixfold3_pdb_seqres_link, params.helixfold3_uniref90_link, params.helixfold3_mgnify_link, params.helixfold3_pdb_mmcif_link, + params.helixfold3_pdb_obsolete_link, params.helixfold3_uniclust30_path, params.helixfold3_ccd_preprocessed_path, params.helixfold3_rfam_path, @@ -271,7 +273,8 @@ workflow NFCORE_PROTEINFOLD { params.helixfold3_pdb_seqres_path, params.helixfold3_uniref90_path, params.helixfold3_mgnify_path, - params.helixfold3_pdb_mmcif_path + params.helixfold3_pdb_mmcif_path, + params.helixfold3_maxit_src_path ) ch_versions = ch_versions.mix(PREPARE_HELIXFOLD3_DBS.out.versions) @@ -291,7 +294,8 @@ workflow NFCORE_PROTEINFOLD { PREPARE_HELIXFOLD3_DBS.out.helixfold3_uniref90, PREPARE_HELIXFOLD3_DBS.out.helixfold3_mgnify, PREPARE_HELIXFOLD3_DBS.out.helixfold3_pdb_mmcif, - PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models + PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models, + PREPARE_HELIXFOLD3_DBS.out.helixfold3_maxit_src ) ch_multiqc = HELIXFOLD3.out.multiqc_report ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 35d953f3..b947fe69 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -26,6 +26,7 @@ process RUN_HELIXFOLD3 { path ('mgnify/*') path ('pdb_mmcif/*') path ('init_models/*') + path ('maxit-src/*') output: path ("${fasta.baseName}*") @@ -40,7 +41,7 @@ process RUN_HELIXFOLD3 { script: """ - export MAXIT_SRC="${params.helixfold3_db}/maxit-v11.200-prod-src" + export MAXIT_SRC="./maxit-src" export RCSBROOT="\$MAXIT_SRC" export PATH="\$MAXIT_SRC/bin:/opt/miniforge/envs/helixfold/bin:$PATH" export OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" diff --git a/nextflow.config b/nextflow.config index a8e07444..d100dd13 100644 --- a/nextflow.config +++ b/nextflow.config @@ -89,30 +89,33 @@ params { helixfold3_db = null // Helixfold3 links - helixfold3_uniclust30_link = null - helixfold3_ccd_preprocessed_link = null - helixfold3_rfam_link = null - helixfold3_init_models_link = null - helixfold3_bfd_link = null - helixfold3_small_bfd_link = null - helixfold3_uniprot_link = null - helixfold3_pdb_seqres_link = null - helixfold3_uniref90_link = null - helixfold3_mgnify_link = null - helixfold3_pdb_mmcif_link = null + helixfold3_uniclust30_link = null + helixfold3_ccd_preprocessed_link = null + helixfold3_rfam_link = null + helixfold3_init_models_link = null + helixfold3_bfd_link = null + helixfold3_small_bfd_link = null + helixfold3_uniprot_sprot_link = null + helixfold3_uniprot_trembl_link = null + helixfold3_pdb_seqres_link = null + helixfold3_uniref90_link = null + helixfold3_mgnify_link = null + helixfold3_pdb_mmcif_link = null + helixfold3_pdb_obsolete_link = null // Helixfold3 paths - helixfold3_uniclust30_path = null - helixfold3_ccd_preprocessed_path = null - helixfold3_rfam_path = null - helixfold3_init_models_path = null - helixfold3_bfd_path = null - helixfold3_small_bfd_path = null - helixfold3_uniprot_path = null - helixfold3_pdb_seqres_path = null - helixfold3_uniref90_path = null - helixfold3_mgnify_path = null - helixfold3_pdb_mmcif_path = null + helixfold3_uniclust30_path = null + helixfold3_ccd_preprocessed_path = null + helixfold3_rfam_path = null + helixfold3_init_models_path = null + helixfold3_bfd_path = null + helixfold3_small_bfd_path = null + helixfold3_uniprot_path = null + helixfold3_pdb_seqres_path = null + helixfold3_uniref90_path = null + helixfold3_mgnify_path = null + helixfold3_pdb_mmcif_path = null + helixfold3_maxit_src_path = null // Foldseek params foldseek_search = null diff --git a/nextflow_schema.json b/nextflow_schema.json index ae2573bf..26e2897c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -726,25 +726,28 @@ "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/MSA/Rfam-14.9_rep_seq.fasta" }, "helixfold3_bfd_link": { - "type": "string" + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" }, "helixfold3_small_bfd_link": { - "type": "string" - }, - "helixfold3_uniprot_link": { - "type": "string" + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" }, "helixfold3_pdb_seqres_link": { - "type": "string" + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" }, "helixfold3_uniref90_link": { - "type": "string" + "type": "string", + "default": "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" }, "helixfold3_mgnify_link": { - "type": "string" + "type": "string", + "default": "https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" }, "helixfold3_pdb_mmcif_link": { - "type": "string" + "type": "string", + "default": "rsync.rcsb.org::ftp_data/structures/divided/mmCIF/" }, "helixfold3_uniclust30_path": { "type": "string", @@ -785,6 +788,22 @@ "helixfold3_pdb_mmcif_path": { "type": "string", "default": "null/pdb_mmcif/*" + }, + "helixfold3_uniprot_sprot_link": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" + }, + "helixfold3_uniprot_trembl_link": { + "type": "string", + "default": "ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" + }, + "helixfold3_pdb_obsolete_link": { + "type": "string", + "default": "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" + }, + "helixfold3_maxit_src_path": { + "type": "string", + "default": "null/maxit-v11.200-prod-src/*" } } } diff --git a/subworkflows/local/prepare_helixfold3_dbs.nf b/subworkflows/local/prepare_helixfold3_dbs.nf index 96f57e53..077e6a3d 100644 --- a/subworkflows/local/prepare_helixfold3_dbs.nf +++ b/subworkflows/local/prepare_helixfold3_dbs.nf @@ -10,7 +10,6 @@ include { ARIA2_UNCOMPRESS as ARIA2_SMALL_BFD ARIA2_UNCOMPRESS as ARIA2_UNIPROT_SPROT ARIA2_UNCOMPRESS as ARIA2_UNIPROT_TREMBL - ARIA2_UNCOMPRESS as ARIA2_PDB_SEQRES ARIA2_UNCOMPRESS as ARIA2_UNIREF90 ARIA2_UNCOMPRESS as ARIA2_MGNIFY ARIA2_UNCOMPRESS as ARIA2_INIT_MODELS @@ -30,11 +29,13 @@ workflow PREPARE_HELIXFOLD3_DBS { helixfold3_init_models_link helixfold3_bfd_link helixfold3_small_bfd_link - helixfold3_uniprot_link + helixfold3_uniprot_sprot_link + helixfold3_uniprot_trembl_link helixfold3_pdb_seqres_link helixfold3_uniref90_link helixfold3_mgnify_link helixfold3_pdb_mmcif_link + helixfold3_pdb_obsolete_link helixfold3_uniclust30_path helixfold3_ccd_preprocessed_path helixfold3_rfam_path @@ -46,9 +47,12 @@ workflow PREPARE_HELIXFOLD3_DBS { helixfold3_uniref90_path helixfold3_mgnify_path helixfold3_pdb_mmcif_path + helixfold3_maxit_src_path main: - if (helixofld3_db) { + helixfold3_maxit_src_path = Channel.value(file(helixfold3_maxit_src_path)) + + if (helixfold3_db) { ch_helixfold3_uniclust30 = Channel.value(file(helixfold3_uniclust30_path)) ch_helixfold3_ccd_preprocessed = Channel.value(file(helixfold3_ccd_preprocessed_path)) ch_helixfold3_rfam = Channel.value(file(helixfold3_rfam_path)) @@ -132,5 +136,6 @@ workflow PREPARE_HELIXFOLD3_DBS { helixfold3_mgnify = ch_helixfold3_mgnify helixfold3_pdb_mmcif = ch_helixfold3_pdb_mmcif helixfold3_init_models = ch_helixfold3_init_models + helixfold3_maxit_src = ch_helixfold3_maxit_src versions = ch_versions } diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index 1d29ecea..b848db18 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -50,7 +50,7 @@ workflow HELIXFOLD3 { ch_helixfold3_mgnify ch_helixfold3_pdb_mmcif ch_helixfold3_init_models - + ch_helixfold3_maxit_src main: ch_multiqc_files = Channel.empty() @@ -70,7 +70,8 @@ workflow HELIXFOLD3 { ch_helixfold3_uniref90, ch_helixfold3_mgnify, ch_helixfold3_pdb_mmcif, - ch_helixfold3_init_models + ch_helixfold3_init_models, + ch_helixfold3_maxit_src ) ch_multiqc_rep = RUN_HELIXFOLD3.out.multiqc.collect() ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) From 62861708e517cdcdcce8cb32be7a5dc6ed701080 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 15:47:19 +1100 Subject: [PATCH 115/135] DBs working --- conf/dbs.config | 2 +- main.nf | 1 + subworkflows/local/prepare_helixfold3_dbs.nf | 18 +++++++++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index a8e8ab23..91421ce3 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -66,7 +66,7 @@ params { helixfold3_small_bfd_link = 'https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz' helixfold3_uniprot_sprot_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz' helixfold3_uniprot_trembl_link = 'ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz' - helixfold3_pdb_seqres_link = 'ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt' + helixfold3_pdb_seqres_link = "${params.pdb_seqres_link}" helixfold3_uniref90_link = 'ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz' helixfold3_mgnify_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz' helixfold3_pdb_mmcif_link = 'rsync.rcsb.org::ftp_data/structures/divided/mmCIF/' diff --git a/main.nf b/main.nf index 3cc8bc76..f34333c6 100644 --- a/main.nf +++ b/main.nf @@ -250,6 +250,7 @@ workflow NFCORE_PROTEINFOLD { // SUBWORKFLOW: Prepare helixfold3 DBs // PREPARE_HELIXFOLD3_DBS ( + params.helixfold3_db, params.helixfold3_uniclust30_link, params.helixfold3_ccd_preprocessed_link, params.helixfold3_rfam_link, diff --git a/subworkflows/local/prepare_helixfold3_dbs.nf b/subworkflows/local/prepare_helixfold3_dbs.nf index 077e6a3d..5f66c8af 100644 --- a/subworkflows/local/prepare_helixfold3_dbs.nf +++ b/subworkflows/local/prepare_helixfold3_dbs.nf @@ -23,6 +23,7 @@ include { DOWNLOAD_PDBMMCIF } from '../../modules/local/download_pdbmmcif' workflow PREPARE_HELIXFOLD3_DBS { take: + helixfold3_db helixfold3_uniclust30_link helixfold3_ccd_preprocessed_link helixfold3_rfam_link @@ -50,7 +51,8 @@ workflow PREPARE_HELIXFOLD3_DBS { helixfold3_maxit_src_path main: - helixfold3_maxit_src_path = Channel.value(file(helixfold3_maxit_src_path)) + ch_helixfold3_maxit_src = Channel.value(file(helixfold3_maxit_src_path)) + ch_versions = Channel.empty() if (helixfold3_db) { ch_helixfold3_uniclust30 = Channel.value(file(helixfold3_uniclust30_path)) @@ -88,10 +90,6 @@ workflow PREPARE_HELIXFOLD3_DBS { ch_helixfold3_small_bfd = ARIA2_SMALL_BFD.out.db ch_versions = ch_versions.mix(ARIA2_SMALL_BFD.out.versions) - ARIA2_PDB_SEQRES(helixfold3_pdb_seqres_link) - ch_helixfold3_pdb_seqres = ARIA2_PDB_SEQRES.out.db - ch_versions = ch_versions.mix(ARIA2_PDB_SEQRES.out.versions) - ARIA2_UNIREF90(helixfold3_uniref90_link) ch_helixfold3_uniref90 = ARIA2_UNIREF90.out.db ch_versions = ch_versions.mix(ARIA2_UNIREF90.out.versions) @@ -108,6 +106,16 @@ workflow PREPARE_HELIXFOLD3_DBS { ch_helixfold3_init_models = ARIA2_INIT_MODELS.out.db ch_versions = ch_versions.mix(ARIA2_INIT_MODELS.out.versions) + ARIA2_PDB_SEQRES ( + [ + [:], + helixfold3_pdb_seqres_link + ] + ) + ch_helixfold3_pdb_seqres = ARIA2_PDB_SEQRES.out.downloaded_file.map{ it[1] } + ch_versions = ch_versions.mix(ARIA2_PDB_SEQRES.out.versions) + + ARIA2_UNIPROT_SPROT( helixfold3_uniprot_sprot_link ) From f4696d5404851ecb5796343fe462b870da8cebec Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 16:23:50 +1100 Subject: [PATCH 116/135] Fixed maxit-src --- conf/dbs.config | 4 ++-- modules/local/run_helixfold3.nf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 91421ce3..70597223 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -49,7 +49,7 @@ params { ] // RoseTTAFold links - uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' + uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' // RoseTTAFold paths @@ -84,7 +84,7 @@ params { helixfold3_uniref90_path = "${params.helixfold3_db}/uniref90/*" helixfold3_mgnify_path = "${params.helixfold3_db}/mgnify/*" helixfold3_pdb_mmcif_path = "${params.helixfold3_db}/pdb_mmcif/*" - helixfold3_maxit_src_path = "${params.helixfold3_db}/maxit-v11.200-prod-src/*" + helixfold3_maxit_src_path = "${params.helixfold3_db}/maxit-v11.200-prod-src" // Esmfold links esmfold_3B_v1 = 'https://dl.fbaipublicfiles.com/fair-esm/models/esmfold_3B_v1.pt' diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index b947fe69..f75629a0 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -26,7 +26,7 @@ process RUN_HELIXFOLD3 { path ('mgnify/*') path ('pdb_mmcif/*') path ('init_models/*') - path ('maxit-src/*') + path ('maxit_src') output: path ("${fasta.baseName}*") @@ -41,7 +41,7 @@ process RUN_HELIXFOLD3 { script: """ - export MAXIT_SRC="./maxit-src" + export MAXIT_SRC="./maxit_src" export RCSBROOT="\$MAXIT_SRC" export PATH="\$MAXIT_SRC/bin:/opt/miniforge/envs/helixfold/bin:$PATH" export OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" From ddc3d8000ceba87a0e053aa6bb9c00564cf6253c Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 16:44:46 +1100 Subject: [PATCH 117/135] Updated files --- .github/CONTRIBUTING.md | 6 +- .github/workflows/ci.yml | 2 + CHANGELOG.md | 2 +- conf/test_helixfold3.config | 37 ++++++++++++ conf/test_rosettafold_all_atom.config | 37 ++++++++++++ conf/unsw_katana.config | 70 ----------------------- dockerfiles/environment.yaml | 16 ++++++ dockerfiles/helixfold3.def | 48 ++++++++++++++++ docs/output.md | 1 - modules/local/run_rosettafold_all_atom.nf | 2 +- 10 files changed, 145 insertions(+), 76 deletions(-) create mode 100644 conf/test_helixfold3.config create mode 100644 conf/test_rosettafold_all_atom.config delete mode 100644 conf/unsw_katana.config create mode 100644 dockerfiles/environment.yaml create mode 100644 dockerfiles/helixfold3.def diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 5d64f953..3e577c29 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -29,7 +29,7 @@ If you're not used to this workflow with git, you can start with some [docs from You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: ```bash -nextflow run . --profile debug,test,docker --outdir +nextflow run . -profile debug,test,docker --outdir ``` When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. @@ -78,8 +78,8 @@ If you wish to contribute a new step, please use the following coding standards: 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. -9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +8. If applicable, add a new test command in `.github/workflows/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 161ca5e8..02a67707 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,6 +43,8 @@ jobs: - "test_colabfold_webserver" - "test_colabfold_download" - "test_esmfold" + - "test_rosettafold_all_atom" + - "test_helixfold3" isMaster: - ${{ github.base_ref == 'master' }} # Exclude conda and singularity on dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f3ab33d..7061254a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[PR #210](https://github.com/nf-core/proteinfold/pull/210)]- Moving post-processing logic to a subworkflow, change wave images pointing to oras to point to https and refactor module to match nf-core folder structure. - [[#214](https://github.com/nf-core/proteinfold/issues/214)]- Fix colabfold image to run in cpus after [#188](https://github.com/nf-core/proteinfold/issues/188) fix. - [[PR ##220](https://github.com/nf-core/proteinfold/pull/220)] - Add RoseTTAFold-All-Atom module. -- [[PR ##221](https://github.com/nf-core/proteinfold/pull/221)] - Add HelixFold3 module. +- [[PR ##223](https://github.com/nf-core/proteinfold/pull/223)] - Add HelixFold3 module. ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 diff --git a/conf/test_helixfold3.config b/conf/test_helixfold3.config new file mode 100644 index 00000000..d08468b8 --- /dev/null +++ b/conf/test_helixfold3.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_helixfold3, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +// Limit resources so that this can run on GitHub Actions +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data to test helixfold3 + mode = 'helixfold3' + helixfold3_db = "${projectDir}/assets/dummy_db_dir" + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'RUN_HELIXFOLD3' { + container = '/srv/scratch/sbf-pipelines/proteinfold/singularity/helixfold3.sif' + } +} + diff --git a/conf/test_rosettafold_all_atom.config b/conf/test_rosettafold_all_atom.config new file mode 100644 index 00000000..258938ca --- /dev/null +++ b/conf/test_rosettafold_all_atom.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run nf-core/proteinfold -profile test_rosettafold_all_atom, --outdir +---------------------------------------------------------------------------------------- +*/ + +stubRun = true + +// Limit resources so that this can run on GitHub Actions +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data to test rosettafold_all_atom + mode = 'rosettafold_all_atom' + rosettafold_all_atom_db = "${projectDir}/assets/dummy_db_dir" + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' +} + +process { + withName: 'RUN_ROSETTAFOLD_ALL_ATOM' { + container = '/srv/scratch/sbf-pipelines/proteinfold/singularity/rosettafold_all_atom.sif' + } +} + diff --git a/conf/unsw_katana.config b/conf/unsw_katana.config deleted file mode 100644 index 825610ea..00000000 --- a/conf/unsw_katana.config +++ /dev/null @@ -1,70 +0,0 @@ -// UNSW Katana nf-core configuration profile - - -params { - config_profile_description = 'UNSW Katana HPC profile for nf-core/proteinfold' - config_profile_contact = '@jscgh' - config_profile_url = 'https://docs.restech.unsw.edu.au/' -} - -process { -// executor = 'pbspro' - queue = 'submission' - - resourceLimits = [ - memory: 500.GB, - cpus: 32, - time: 200.h - ] - - // TODO nf-core: Check the defaults for all processes - cpus = { 1 * task.attempt } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } - - errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - withLabel:process_single { - cpus = { 1 } - memory = { 6.GB * task.attempt } - time = { 4.h * task.attempt } - } - withLabel:process_low { - cpus = { 2 * task.attempt } - memory = { 12.GB * task.attempt } - time = { 4.h * task.attempt } - } - withLabel:process_medium { - cpus = { 6 * task.attempt } - memory = { 100.GB * task.attempt } - time = { 8.h * task.attempt } - } - withLabel:process_high { - cpus = { 8 * task.attempt } - memory = { 125.GB * task.attempt } - time = { 16.h * task.attempt } - } - withLabel:process_long { - time = { 24.h * task.attempt } - } - withLabel:process_high_memory { - memory = { 250.GB * task.attempt } - } - withLabel:error_ignore { - errorStrategy = 'ignore' - } - withLabel:error_retry { - errorStrategy = 'retry' - maxRetries = 1 - } - withLabel:gpu_compute { - accelerator = 1 - clusterOptions = { "-l host=k095 -l ngpus=1 -l ncpus=${task.cpus} -l mem=${task.memory.toMega()}mb" } - } - - withName: 'RUN_HELIXFOLD3' { - containerOptions = "--bind \${TMPDIR}:/tmp,\${PWD} --env CUDA_VISIBLE_DEVICES=0" - } -} diff --git a/dockerfiles/environment.yaml b/dockerfiles/environment.yaml new file mode 100644 index 00000000..d78985e7 --- /dev/null +++ b/dockerfiles/environment.yaml @@ -0,0 +1,16 @@ +name: helixfold +channels: + - conda-forge + - bioconda + - nvidia + - biocore + - defaults + +dependencies: + - python=3.9 + - cuda-toolkit=12.0 + - cudnn=8.4.0 + - nccl=2.14 + - libgcc + - libgomp + - pip diff --git a/dockerfiles/helixfold3.def b/dockerfiles/helixfold3.def new file mode 100644 index 00000000..5e0eb7db --- /dev/null +++ b/dockerfiles/helixfold3.def @@ -0,0 +1,48 @@ +Bootstrap: docker +From: nvidia/cuda:12.6.0-cudnn-devel-ubuntu24.04 + +%labels + Author j.caley@unsw.edu.au + Version 0.2.1 + +%files + environment.yaml . + +%post + apt update && DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y wget git + + wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" + bash Miniforge3-Linux-x86_64.sh -b -p /opt/miniforge + rm Miniforge3-Linux-x86_64.sh + export PATH="/opt/miniforge/bin:$PATH" + + git clone --single-branch --branch dev --depth 1 --no-checkout https://github.com/PaddlePaddle/PaddleHelix.git app/helixfold3 + cd app/helixfold3 + git sparse-checkout init --cone + git sparse-checkout set apps/protein_folding/helixfold3 + git checkout dev + mv apps/protein_folding/helixfold3/* . + rm -rf apps + mv /environment.yaml . + mamba env create -f environment.yaml + + conda install -y -c bioconda aria2 hmmer==3.3.2 kalign2==2.04 hhsuite==3.3.0 -n helixfold + conda install -y -c conda-forge openbabel -n helixfold + + mamba run -n helixfold \ + 'python3 -m pip install paddlepaddle-gpu==2.6.1 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html && \ + python3 -m pip install -r requirements.txt' + + apt autoremove -y && apt remove --purge -y wget git && apt clean -y + rm -rf /var/lib/apt/lists/* /root/.cache *.tar.gz + mamba clean --all --force-pkgs-dirs -y + +%environment + export PATH="/app/helixfold3:/opt/miniforge/bin:$PATH" + export PYTHONPATH="/app/helixfold3:$PYTHONPATH" + export PYTHON_BIN="/opt/miniforge/envs/helixfold/bin/python3.9" + export ENV_BIN="/opt/miniforge/envs/helixfold/bin" + export OBABEL_BIN="/opt/miniforge/envs/helixfold/bin" + +%runscript + mamba run --name helixfold "$@" diff --git a/docs/output.md b/docs/output.md index f3119a81..3a5c1787 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,7 +14,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and predicts pr - [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 (API server or local search) followed by ColabFold - [ESMFold](https://github.com/facebookresearch/esm) - [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) -- [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) See main [README.md](https://github.com/nf-core/proteinfold/blob/master/README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf index 908d667a..38e04d8d 100644 --- a/modules/local/run_rosettafold_all_atom.nf +++ b/modules/local/run_rosettafold_all_atom.nf @@ -11,7 +11,7 @@ process RUN_ROSETTAFOLD_ALL_ATOM { error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") } - container "/srv/scratch/sbf-pipelines/proteinfold/singularity/RoseTTAFold_All_Atom.sif" + container "/srv/scratch/sbf-pipelines/proteinfold/singularity/rosettafold_all_atom.sif" input: tuple val(meta), path(fasta) From 804a9bb1208fc62bdf34729af5be9eebf9dbb989 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 16:57:13 +1100 Subject: [PATCH 118/135] Aligning files --- .gitignore | 1 - dockerfiles/rosettafold_all_atom.def | 43 ++++++++++++++++++++++++++++ docs/output.md | 1 + 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 dockerfiles/rosettafold_all_atom.def diff --git a/.gitignore b/.gitignore index 8070b718..a42ce016 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,3 @@ testing/ testing* *.pyc null/ -.nfs* diff --git a/dockerfiles/rosettafold_all_atom.def b/dockerfiles/rosettafold_all_atom.def new file mode 100644 index 00000000..f22775cc --- /dev/null +++ b/dockerfiles/rosettafold_all_atom.def @@ -0,0 +1,43 @@ +Bootstrap: docker +From: nvidia/cuda:12.6.0-cudnn-devel-ubuntu24.04 + +%labels + Author j.caley@unsw.edu.au + Version 0.2.3 + +%post + apt update && DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y wget git + + wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" + bash Miniforge3-Linux-x86_64.sh -b -p /opt/miniforge + rm Miniforge3-Linux-x86_64.sh + export PATH="/opt/miniforge/bin:$PATH" + mamba init + + git clone --single-branch --depth 1 https://github.com/Australian-Structural-Biology-Computing/RoseTTAFold-All-Atom.git /app/RoseTTAFold-All-Atom + cd /app/RoseTTAFold-All-Atom + mamba env create -f environment.yaml + + mamba run -n RFAA \ + 'python rf2aa/SE3Transformer/setup.py install && \ + bash install_dependencies.sh' + + wget https://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz + mkdir -p blast-2.2.26 + tar -xf blast-2.2.26-x64-linux.tar.gz -C blast-2.2.26 + cp -r blast-2.2.26/blast-2.2.26/ blast-2.2.26_bk + rm -r blast-2.2.26 + mv blast-2.2.26_bk/ blast-2.2.26 + + apt autoremove -y && apt remove --purge -y wget git && apt clean -y + rm -rf /var/lib/apt/lists/* /root/.cache *.tar.gz + mamba clean --all --force-pkgs-dirs -y + chmod 755 input_prep/make_ss.sh + +%environment + export PYTHONPATH="/app/RoseTTAFold-All-Atom:$PYTHONPATH" + export PATH="/opt/miniforge/bin:/app/RoseTTAFold-All-Atom:$PATH" + export DGLBACKEND="pytorch" + +%runscript + mamba run --name RFAA python -m rf2aa.run_inference --config-name "$@" diff --git a/docs/output.md b/docs/output.md index 3a5c1787..f3119a81 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,6 +14,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and predicts pr - [ColabFold](https://github.com/sokrypton/ColabFold) - MMseqs2 (API server or local search) followed by ColabFold - [ESMFold](https://github.com/facebookresearch/esm) - [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) +- [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) See main [README.md](https://github.com/nf-core/proteinfold/blob/master/README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. From 95c6fcfbfdd057d48366300e8203865d807d6fc7 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 21 Nov 2024 17:26:49 +1100 Subject: [PATCH 119/135] schema_input.json added backwards compatibility to sequence columns --- assets/schema_input.json | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index b1f8b2ef..2093c68f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,6 +7,12 @@ "items": { "type": "object", "properties": { + "sequence": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sequence name must be provided and cannot contain spaces", + "meta": ["sequence"] + }, "id": { "type": "string", "pattern": "^\\S+$", @@ -21,6 +27,10 @@ "errorMessage": "Fasta, yaml or json file must be provided, cannot contain spaces and must have extension '.fa', '.fasta', '.yaml', '.yml', or '.json'" } }, - "required": ["id", "fasta"] + "required": ["fasta"], + "anyOf": [ + { "required": ["sequence"] }, + { "required": ["id"] } + ] } } From 1bbd4e68dd051a84c2e749c07e7242328a7b431e Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 22 Nov 2024 13:39:54 +1100 Subject: [PATCH 120/135] Test profiles added to nextflow.config --- nextflow.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index d100dd13..63362e67 100644 --- a/nextflow.config +++ b/nextflow.config @@ -272,7 +272,6 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - unsw_katana { includeConfig 'conf/unsw_katana.config' } test { includeConfig 'conf/test.config' } test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } @@ -289,6 +288,8 @@ profiles { test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } + test_rosettafold_all_atom { includeConfig 'conf/test_rosettafold_all_atom.config' } + test_helixfold3 { includeConfig 'conf/test_helixfold3.config' } } // Load nf-core custom profiles from different Institutions From f6308fe0601f11c62f0e7e33fab441c2b5248f0c Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 Nov 2024 11:18:46 +1100 Subject: [PATCH 121/135] Aligned with dev --- assets/samplesheet.csv | 2 +- conf/modules_alphafold2.config | 32 ++++-- conf/modules_colabfold.config | 32 ++++-- conf/modules_esmfold.config | 10 +- conf/modules_rosettafold_all_atom.config | 15 --- conf/test_rosettafold_all_atom.config | 37 ------ dockerfiles/rosettafold_all_atom.def | 43 ------- modules/local/run_rosettafold_all_atom.nf | 63 ---------- .../local/prepare_rosettafold_all_atom_dbs.nf | 24 ---- workflows/rosettafold_all_atom.nf | 108 ------------------ 10 files changed, 59 insertions(+), 307 deletions(-) delete mode 100644 conf/modules_rosettafold_all_atom.config delete mode 100644 conf/test_rosettafold_all_atom.config delete mode 100644 dockerfiles/rosettafold_all_atom.def delete mode 100644 modules/local/run_rosettafold_all_atom.nf delete mode 100644 subworkflows/local/prepare_rosettafold_all_atom_dbs.nf delete mode 100644 workflows/rosettafold_all_atom.nf diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 467fdcf0..b458d604 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sequence,fasta +id,fasta T1024,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1024.fasta T1026,https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/T1026.fasta diff --git a/conf/modules_alphafold2.config b/conf/modules_alphafold2.config index 33b04c38..a12105ab 100644 --- a/conf/modules_alphafold2.config +++ b/conf/modules_alphafold2.config @@ -40,9 +40,18 @@ if (params.alphafold2_mode == 'standard') { params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ], + [ + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/top_ranked_structures" }, + mode: 'copy', + saveAs: { "${meta.id}.pdb" }, + pattern: '*_alphafold2.pdb' + ] ] } } @@ -54,7 +63,7 @@ if (params.alphafold2_mode == 'split_msa_prediction') { withName: 'RUN_ALPHAFOLD2_MSA' { ext.args = params.max_template_date ? "--max_template_date ${params.max_template_date}" : '' publishDir = [ - path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, + path: { "${params.outdir}/alphafold2_${params.alphafold2_mode}" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -64,9 +73,18 @@ if (params.alphafold2_mode == 'split_msa_prediction') { if(params.use_gpu) { accelerator = 1 } ext.args = params.use_gpu ? '--use_gpu_relax=true' : '--use_gpu_relax=false' publishDir = [ - path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + [ + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ], + [ + path: { "${params.outdir}/alphafold2/${params.alphafold2_mode}/top_ranked_structures" }, + mode: 'copy', + saveAs: { "${meta.id}.pdb" }, + pattern: '*_alphafold2.pdb' + ] ] } } diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 2efcfa01..c37214d3 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -30,10 +30,18 @@ if (params.colabfold_server == 'webserver') { params.host_url ? "--host-url ${params.host_url}" : '' ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/colabfold/${params.colabfold_server}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: '*.*' + [ + path: { "${params.outdir}/colabfold/${params.colabfold_server}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ], + [ + path: { "${params.outdir}/colabfold/${params.colabfold_server}/top_ranked_structures" }, + mode: 'copy', + saveAs: { "${meta.id}.pdb" }, + pattern: '*_relaxed_rank_001*.pdb' + ] ] } } @@ -67,10 +75,18 @@ if (params.colabfold_server == 'local') { params.use_templates ? '--templates' : '' ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/colabfold/${params.colabfold_server}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: '*.*' + [ + path: { "${params.outdir}/colabfold/${params.colabfold_server}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.*' + ], + [ + path: { "${params.outdir}/colabfold/${params.colabfold_server}/top_ranked_structures" }, + mode: 'copy', + saveAs: { "${meta.id}.pdb" }, + pattern: '*_relaxed_rank_001*.pdb' + ], ] } } diff --git a/conf/modules_esmfold.config b/conf/modules_esmfold.config index d8356924..3468718f 100644 --- a/conf/modules_esmfold.config +++ b/conf/modules_esmfold.config @@ -14,11 +14,19 @@ process { withName: 'RUN_ESMFOLD' { ext.args = {params.use_gpu ? '' : '--cpu-only'} publishDir = [ - path: { "${params.outdir}/esmfold" }, + [ + path: { "${params.outdir}/esmfold/default" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, pattern: '*.*' + ], + [ + path: { "${params.outdir}/esmfold/default/top_ranked_structures" }, + mode: 'copy', + saveAs: { "${meta.id}.pdb" }, + pattern: '*.pdb' ] + ] } withName: 'NFCORE_PROTEINFOLD:ESMFOLD:MULTIQC' { diff --git a/conf/modules_rosettafold_all_atom.config b/conf/modules_rosettafold_all_atom.config deleted file mode 100644 index 0e871755..00000000 --- a/conf/modules_rosettafold_all_atom.config +++ /dev/null @@ -1,15 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. ----------------------------------------------------------------------------------------- -*/ - -// -// General configuration options -// diff --git a/conf/test_rosettafold_all_atom.config b/conf/test_rosettafold_all_atom.config deleted file mode 100644 index 258938ca..00000000 --- a/conf/test_rosettafold_all_atom.config +++ /dev/null @@ -1,37 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - Use as follows: - nextflow run nf-core/proteinfold -profile test_rosettafold_all_atom, --outdir ----------------------------------------------------------------------------------------- -*/ - -stubRun = true - -// Limit resources so that this can run on GitHub Actions -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data to test rosettafold_all_atom - mode = 'rosettafold_all_atom' - rosettafold_all_atom_db = "${projectDir}/assets/dummy_db_dir" - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.0/samplesheet.csv' -} - -process { - withName: 'RUN_ROSETTAFOLD_ALL_ATOM' { - container = '/srv/scratch/sbf-pipelines/proteinfold/singularity/rosettafold_all_atom.sif' - } -} - diff --git a/dockerfiles/rosettafold_all_atom.def b/dockerfiles/rosettafold_all_atom.def deleted file mode 100644 index f22775cc..00000000 --- a/dockerfiles/rosettafold_all_atom.def +++ /dev/null @@ -1,43 +0,0 @@ -Bootstrap: docker -From: nvidia/cuda:12.6.0-cudnn-devel-ubuntu24.04 - -%labels - Author j.caley@unsw.edu.au - Version 0.2.3 - -%post - apt update && DEBIAN_FRONTEND=noninteractive apt install --no-install-recommends -y wget git - - wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" - bash Miniforge3-Linux-x86_64.sh -b -p /opt/miniforge - rm Miniforge3-Linux-x86_64.sh - export PATH="/opt/miniforge/bin:$PATH" - mamba init - - git clone --single-branch --depth 1 https://github.com/Australian-Structural-Biology-Computing/RoseTTAFold-All-Atom.git /app/RoseTTAFold-All-Atom - cd /app/RoseTTAFold-All-Atom - mamba env create -f environment.yaml - - mamba run -n RFAA \ - 'python rf2aa/SE3Transformer/setup.py install && \ - bash install_dependencies.sh' - - wget https://ftp.ncbi.nlm.nih.gov/blast/executables/legacy.NOTSUPPORTED/2.2.26/blast-2.2.26-x64-linux.tar.gz - mkdir -p blast-2.2.26 - tar -xf blast-2.2.26-x64-linux.tar.gz -C blast-2.2.26 - cp -r blast-2.2.26/blast-2.2.26/ blast-2.2.26_bk - rm -r blast-2.2.26 - mv blast-2.2.26_bk/ blast-2.2.26 - - apt autoremove -y && apt remove --purge -y wget git && apt clean -y - rm -rf /var/lib/apt/lists/* /root/.cache *.tar.gz - mamba clean --all --force-pkgs-dirs -y - chmod 755 input_prep/make_ss.sh - -%environment - export PYTHONPATH="/app/RoseTTAFold-All-Atom:$PYTHONPATH" - export PATH="/opt/miniforge/bin:/app/RoseTTAFold-All-Atom:$PATH" - export DGLBACKEND="pytorch" - -%runscript - mamba run --name RFAA python -m rf2aa.run_inference --config-name "$@" diff --git a/modules/local/run_rosettafold_all_atom.nf b/modules/local/run_rosettafold_all_atom.nf deleted file mode 100644 index 38e04d8d..00000000 --- a/modules/local/run_rosettafold_all_atom.nf +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Run RoseTTAFold_All_Atom - */ -process RUN_ROSETTAFOLD_ALL_ATOM { - tag "$meta.id" - label 'gpu_compute' - label 'process_medium' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local RUN_ROSETTAFOLD_ALL_ATOM module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "/srv/scratch/sbf-pipelines/proteinfold/singularity/rosettafold_all_atom.sif" - - input: - tuple val(meta), path(fasta) - path ('bfd/*') - path ('UniRef30_2020_06/*') - path ('pdb100_2021Mar03/*') - - output: - path ("${fasta.baseName}*") - tuple val(meta), path ("${meta.id}_rosettafold_all_atom.pdb") , emit: main_pdb - tuple val(meta), path ("*pdb") , emit: pdb - tuple val(meta), path ("*_mqc.tsv") , emit: multiqc - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - """ - ln -s /app/RoseTTAFold-All-Atom/* . - - mamba run --name RFAA python -m rf2aa.run_inference \ - loader_params.MAXCYCLE=1 \ - checkpoint_path="/srv/scratch/sbf/rfaa/RFAA_paper_weights.pt" \ - --config-dir /app/RoseTTAFold-All-Atom/rf2aa/config/inference \ - --config-name "${fasta}" - - cp "${fasta.baseName}".pdb ./"${meta.id}"_rosettafold_all_atom.pdb - awk '{print \$6"\\t"\$11}' "${meta.id}"_rosettafold_all_atom.pdb | uniq > plddt.tsv - echo -e Positions"\\t""${meta.id}"_rosettafold_all_atom.pdb > header.tsv - cat header.tsv plddt.tsv > "${meta.id}"_plddt_mqc.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python3 --version | sed 's/Python //g') - END_VERSIONS - """ - - stub: - """ - touch ./"${meta.id}"_rosettafold_all_atom.pdb - touch ./"${meta.id}"_mqc.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python3 --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf b/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf deleted file mode 100644 index e03de5c1..00000000 --- a/subworkflows/local/prepare_rosettafold_all_atom_dbs.nf +++ /dev/null @@ -1,24 +0,0 @@ -// -// TBD: Download all the required Rosettafold-All-Atom databases and parameters -// - - -workflow PREPARE_ROSETTAFOLD_ALL_ATOM_DBS { - - take: - bfd_rosettafold_all_atom_path // directory: /path/to/bfd/ - uniref30_rosettafold_all_atom_path // directory: /path/to/uniref30/rosettafold_all_atom/ - pdb100_path - - main: - ch_bfd = Channel.value(file(bfd_rosettafold_all_atom_path)) - ch_uniref30 = Channel.value(file(uniref30_rosettafold_all_atom_path)) - ch_pdb100 = Channel.value(file(pdb100_path)) - ch_versions = Channel.empty() - - emit: - bfd = ch_bfd - uniref30 = ch_uniref30 - pdb100 = ch_pdb100 - versions = ch_versions -} diff --git a/workflows/rosettafold_all_atom.nf b/workflows/rosettafold_all_atom.nf deleted file mode 100644 index 4861e35d..00000000 --- a/workflows/rosettafold_all_atom.nf +++ /dev/null @@ -1,108 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Loaded from modules/local/ -// -include { RUN_ROSETTAFOLD_ALL_ATOM } from '../modules/local/run_rosettafold_all_atom' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { MULTIQC } from '../modules/nf-core/multiqc/main' - -// -// SUBWORKFLOW: Consisting entirely of nf-core/modules -// -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow ROSETTAFOLD_ALL_ATOM { - - take: - ch_samplesheet - ch_versions // channel: [ path(versions.yml) ] - ch_bfd // channel: path(bfd) - ch_uniref30 // channel: path(uniref30) - ch_pdb100 - - main: - ch_multiqc_files = Channel.empty() - - // - // SUBWORKFLOW: Run Rosettafold_All_Atom - // - RUN_ROSETTAFOLD_ALL_ATOM ( - ch_samplesheet, - ch_bfd, - ch_uniref30, - ch_pdb100 - ) - ch_multiqc_rep = RUN_ROSETTAFOLD_ALL_ATOM.out.multiqc.collect() - ch_versions = ch_versions.mix(RUN_ROSETTAFOLD_ALL_ATOM.out.versions) - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } - - // - // MODULE: MultiQC - // - ch_multiqc_report = Channel.empty() - if (!params.skip_multiqc) { - ch_multiqc_report = Channel.empty() - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] - ) - ch_multiqc_report = MULTIQC.out.report.toList() - } - - emit: - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ From 0a4b030de77e45ded62360340b1a69beb777f1e1 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 Nov 2024 11:40:22 +1100 Subject: [PATCH 122/135] Adding multiqc to hf3 module --- main.nf | 11 ++-- subworkflows/local/post_processing.nf | 5 ++ workflows/helixfold3.nf | 88 +++++++++++---------------- 3 files changed, 49 insertions(+), 55 deletions(-) diff --git a/main.nf b/main.nf index f34333c6..b33b9dde 100644 --- a/main.nf +++ b/main.nf @@ -73,6 +73,7 @@ workflow NFCORE_PROTEINFOLD { ch_alphafold_top_ranked_pdb = Channel.empty() ch_colabfold_top_ranked_pdb = Channel.empty() ch_esmfold_top_ranked_pdb = Channel.empty() + ch_helixfold3_top_ranked_pdb = Channel.empty() ch_multiqc = Channel.empty() ch_versions = Channel.empty() ch_report_input = Channel.empty() @@ -298,11 +299,12 @@ workflow NFCORE_PROTEINFOLD { PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models, PREPARE_HELIXFOLD3_DBS.out.helixfold3_maxit_src ) - ch_multiqc = HELIXFOLD3.out.multiqc_report - ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) + ch_helixfold3_top_ranked_pdb = helixfold3.out.top_ranked_pdb + ch_multiqc = ch_multiqc.mix(helixfold3.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(helixfold3.out.versions) + ch_report_input = ch_report_input.mix(helixfold3.out.pdb_msa) } - // // POST PROCESSING: generate visualisation reports // @@ -345,7 +347,8 @@ workflow NFCORE_PROTEINFOLD { ch_multiqc_methods_description, ch_alphafold_top_ranked_pdb, ch_colabfold_top_ranked_pdb, - ch_esmfold_top_ranked_pdb + ch_esmfold_top_ranked_pdb, + ch_helixfold3_top_ranked_pdb ) emit: diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index 45d2f0b6..2b9b4b63 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -38,6 +38,7 @@ workflow POST_PROCESSING { ch_alphafold2_top_ranked_pdb ch_colabfold_top_ranked_pdb ch_esmfold_top_ranked_pdb + ch_helixfold3_top_ranked_pdb main: ch_comparison_report_files = Channel.empty() @@ -67,6 +68,10 @@ workflow POST_PROCESSING { ch_esmfold_top_ranked_pdb ) + ch_comparison_report_files = ch_comparison_report_files.mix( + ch_helixfold3_top_ranked_pdb + ) + ch_comparison_report_files .groupTuple(by: [0], size: requested_modes_size) .set { ch_comparison_report_input } diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index b848db18..c24fc622 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -15,19 +15,6 @@ include { RUN_HELIXFOLD3 } from '../modules/local/run_helixfold3' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Installed directly from nf-core/modules -// -include { MULTIQC } from '../modules/nf-core/multiqc/main' - -// -// SUBWORKFLOW: Consisting entirely of nf-core/modules -// -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_proteinfold_pipeline' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -53,8 +40,12 @@ workflow HELIXFOLD3 { ch_helixfold3_maxit_src main: - ch_multiqc_files = Channel.empty() - + ch_multiqc_files = Channel.empty() + ch_pdb = Channel.empty() + ch_top_ranked_pdb = Channel.empty() + ch_msa = Channel.empty() + ch_multiqc_report = Channel.empty() + // // SUBWORKFLOW: Run helixfold3 // @@ -73,48 +64,43 @@ workflow HELIXFOLD3 { ch_helixfold3_init_models, ch_helixfold3_maxit_src ) - ch_multiqc_rep = RUN_HELIXFOLD3.out.multiqc.collect() - ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_proteinfold_software_mqc_versions.yml', sort: true, newLine: true) - .set { ch_collated_versions } + RUN_HELIXFOLD3_ALL_ATOM + .out + .multiqc + .map { it[1] } + .toSortedList() + .map { [ [ "model": "helixfold3" ], it.flatten() ] } + .set { ch_multiqc_report } - // - // MODULE: MultiQC - // - ch_multiqc_report = Channel.empty() - if (!params.skip_multiqc) { - ch_multiqc_report = Channel.empty() - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() - summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_pdb = ch_pdb.mix(RUN_HELIXFOLD3_ALL_ATOM.out.pdb) + ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_HELIXFOLD3_ALL_ATOM.out.top_ranked_pdb) + ch_versions = ch_versions.mix(RUN_HELIXFOLD3_ALL_ATOM.out.versions) + + RUN_HELIXFOLD3_ALL_ATOM + .out + .pdb + .combine(ch_dummy_file) + .map { + it[0]["model"] = "helixfold3" + it + } + .set { ch_pdb_msa } - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_rep) + ch_top_ranked_pdb + .map { [ it[0]["id"], it[0], it[1] ] } + .set { ch_top_ranked_pdb } - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] - ) - ch_multiqc_report = MULTIQC.out.report.toList() - } + ch_pdb + .join(ch_msa) + .map { + it[0]["model"] = "helixfold3" + it + } + .set { ch_pdb_msa } emit: + top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } From 30ffcd673da174a59599c2df67d4c02ae9c71643 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 Nov 2024 13:26:46 +1100 Subject: [PATCH 123/135] Removed leftover RFAA files and variables --- conf/dbs.config | 9 --------- main.nf | 33 --------------------------------- nextflow.config | 12 +----------- nextflow_schema.json | 27 ++------------------------- 4 files changed, 3 insertions(+), 78 deletions(-) diff --git a/conf/dbs.config b/conf/dbs.config index 70597223..eded8c0c 100644 --- a/conf/dbs.config +++ b/conf/dbs.config @@ -48,15 +48,6 @@ params { "alphafold2_ptm" : "alphafold_params_2021-07-14" ] - // RoseTTAFold links - uniref30 = 'http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz' - pdb100 = 'https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz' - - // RoseTTAFold paths - uniref30_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/uniref30/UniRef30_2020_06/*" - pdb100_path = "${params.rosettafold_all_atom_db}/pdb100_2021Mar03/*" - bfd_rosettafold_all_atom_path = "${params.rosettafold_all_atom_db}/bfd/*" - // Helixfold3 links helixfold3_uniclust30_link = 'https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz' helixfold3_ccd_preprocessed_link = 'https://paddlehelix.bd.bcebos.com/HelixFold3/CCD/ccd_preprocessed_etkdg.pkl.gz' diff --git a/main.nf b/main.nf index b33b9dde..017aa3a0 100644 --- a/main.nf +++ b/main.nf @@ -27,10 +27,6 @@ if (params.mode.toLowerCase().split(",").contains("esmfold")) { include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' include { ESMFOLD } from './workflows/esmfold' } -if (params.mode == "rosettafold_all_atom") { - include { PREPARE_ROSETTAFOLD_ALL_ATOM_DBS } from './subworkflows/local/prepare_rosettafold_all_atom_dbs' - include { ROSETTAFOLD_ALL_ATOM } from './workflows/rosettafold_all_atom' -} if (params.mode == "helixfold3") { include { PREPARE_HELIXFOLD3_DBS } from './subworkflows/local/prepare_helixfold3_dbs' include { HELIXFOLD3 } from './workflows/helixfold3' @@ -214,35 +210,6 @@ workflow NFCORE_PROTEINFOLD { ch_report_input = ch_report_input.mix(ESMFOLD.out.pdb_msa) } - // - // WORKFLOW: Run rosettafold_all_atom - // - if(params.mode == "rosettafold_all_atom") { - // - // SUBWORKFLOW: Prepare Rosettafold-all-atom DBs - // - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS ( - params.bfd_rosettafold_all_atom_path, - params.uniref30_rosettafold_all_atom_path, - params.pdb100_path - ) - ch_versions = ch_versions.mix(PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.versions) - - // - // WORKFLOW: Run nf-core/rosettafold_all_atom workflow - // - ROSETTAFOLD_ALL_ATOM ( - ch_samplesheet, - ch_versions, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.bfd.ifEmpty([]).first(), - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.uniref30, - PREPARE_ROSETTAFOLD_ALL_ATOM_DBS.out.pdb100 - ) - ch_multiqc = ROSETTAFOLD_ALL_ATOM.out.multiqc_report - ch_versions = ch_versions.mix(ROSETTAFOLD_ALL_ATOM.out.versions) - } - - // // WORKFLOW: Run helixfold3 // diff --git a/nextflow.config b/nextflow.config index 2ac61413..2a12b321 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,7 @@ params { // Input options input = null - mode = 'alphafold2' // {alphafold2, colabfold, esmfold, rosettafold_all_atom, helixfold3} + mode = 'alphafold2' // {alphafold2, colabfold, esmfold, helixfold3} use_gpu = false split_fasta = false @@ -80,12 +80,6 @@ params { // Esmfold paths esmfold_params_path = null - // Rosettafold-all-atom parameters - rosettafold_all_atom_db = null - uniref30_rosettafold_all_atom_path = null - pdb100_path = null - bfd_rosettafold_all_atom_path = null - // Helixfold3 parameters helixfold3_db = null @@ -290,7 +284,6 @@ profiles { test_full_colabfold_multimer { includeConfig 'conf/test_full_colabfold_webserver_multimer.config' } test_full_esmfold { includeConfig 'conf/test_full_esmfold.config' } test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } - test_rosettafold_all_atom { includeConfig 'conf/test_rosettafold_all_atom.config' } test_helixfold3 { includeConfig 'conf/test_helixfold3.config' } } @@ -411,9 +404,6 @@ if (params.mode.toLowerCase().split(",").contains("colabfold")) { if (params.mode.toLowerCase().split(",").contains("esmfold")) { includeConfig 'conf/modules_esmfold.config' } -if (params.mode.toLowerCase().split(",").contains("rosettafold_all_atom")) { - includeConfig 'conf/modules_rosettafold_all_atom.config' -} if (params.mode.toLowerCase().split(",").contains("helixfold3")) { includeConfig 'conf/modules_helixfold3.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 6bf1ffb1..1aa3d19a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -684,29 +684,6 @@ } ], "properties": { - "rosettafold_all_atom_db": { - "type": "string" - }, - "uniref30_rosettafold_all_atom_path": { - "type": "string", - "default": "null/uniref30/UniRef30_2020_06/*" - }, - "pdb100_path": { - "type": "string", - "default": "null/pdb100_2021Mar03/*" - }, - "bfd_rosettafold_all_atom_path": { - "type": "string", - "default": "null/bfd/*" - }, - "uniref30": { - "type": "string", - "default": "http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz" - }, - "pdb100": { - "type": "string", - "default": "https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz" - }, "helixfold3_init_models_link": { "type": "string", "default": "https://paddlehelix.bd.bcebos.com/HelixFold3/params/HelixFold3-params-240814.zip" @@ -740,7 +717,7 @@ }, "helixfold3_pdb_seqres_link": { "type": "string", - "default": "ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" + "default": "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" }, "helixfold3_uniref90_link": { "type": "string", @@ -808,7 +785,7 @@ }, "helixfold3_maxit_src_path": { "type": "string", - "default": "null/maxit-v11.200-prod-src/*" + "default": "null/maxit-v11.200-prod-src" } } } From 6a4f6ac1f9da9d69e7fd47d6aadd23c07448f195 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 Nov 2024 12:24:32 +1100 Subject: [PATCH 124/135] Working with multiqc enabled --- main.nf | 14 +++++++------- modules/local/run_helixfold3.nf | 5 ++--- workflows/helixfold3.nf | 25 ++++++++----------------- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/main.nf b/main.nf index 017aa3a0..7608de18 100644 --- a/main.nf +++ b/main.nf @@ -27,8 +27,8 @@ if (params.mode.toLowerCase().split(",").contains("esmfold")) { include { PREPARE_ESMFOLD_DBS } from './subworkflows/local/prepare_esmfold_dbs' include { ESMFOLD } from './workflows/esmfold' } -if (params.mode == "helixfold3") { - include { PREPARE_HELIXFOLD3_DBS } from './subworkflows/local/prepare_helixfold3_dbs' +if (params.mode.toLowerCase().split(",").contains("helixfold3")) { + include { PREPARE_HELIXFOLD3_DBS } from './subworkflows/local/prepare_helixfold3_dbs' include { HELIXFOLD3 } from './workflows/helixfold3' } @@ -213,7 +213,7 @@ workflow NFCORE_PROTEINFOLD { // // WORKFLOW: Run helixfold3 // - if(params.mode == "helixfold3") { + if(requested_modes.contains("helixfold3")) { // // SUBWORKFLOW: Prepare helixfold3 DBs // @@ -266,10 +266,10 @@ workflow NFCORE_PROTEINFOLD { PREPARE_HELIXFOLD3_DBS.out.helixfold3_init_models, PREPARE_HELIXFOLD3_DBS.out.helixfold3_maxit_src ) - ch_helixfold3_top_ranked_pdb = helixfold3.out.top_ranked_pdb - ch_multiqc = ch_multiqc.mix(helixfold3.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(helixfold3.out.versions) - ch_report_input = ch_report_input.mix(helixfold3.out.pdb_msa) + ch_helixfold3_top_ranked_pdb = HELIXFOLD3.out.top_ranked_pdb + ch_multiqc = ch_multiqc.mix(HELIXFOLD3.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(HELIXFOLD3.out.versions) + ch_report_input = ch_report_input.mix(HELIXFOLD3.out.pdb_msa) } // diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index f75629a0..8c941af6 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -30,9 +30,9 @@ process RUN_HELIXFOLD3 { output: path ("${fasta.baseName}*") - tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif - tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: main_pdb + tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: top_ranked_pdb tuple val(meta), path ("ranked*pdb") , emit: pdb + tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif tuple val(meta), path ("*_mqc.tsv") , emit: multiqc path "versions.yml", emit: versions @@ -74,7 +74,6 @@ process RUN_HELIXFOLD3 { --model_name allatom_demo \ --init_model "./init_models/HelixFold3-240814.pdparams" \ --infer_times 4 \ - --diff_batch_size 1 \ --logging_level "ERROR" \ --precision "bf16" diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index c24fc622..1d18e7bd 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -65,7 +65,7 @@ workflow HELIXFOLD3 { ch_helixfold3_maxit_src ) - RUN_HELIXFOLD3_ALL_ATOM + RUN_HELIXFOLD3 .out .multiqc .map { it[1] } @@ -73,19 +73,9 @@ workflow HELIXFOLD3 { .map { [ [ "model": "helixfold3" ], it.flatten() ] } .set { ch_multiqc_report } - ch_pdb = ch_pdb.mix(RUN_HELIXFOLD3_ALL_ATOM.out.pdb) - ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_HELIXFOLD3_ALL_ATOM.out.top_ranked_pdb) - ch_versions = ch_versions.mix(RUN_HELIXFOLD3_ALL_ATOM.out.versions) - - RUN_HELIXFOLD3_ALL_ATOM - .out - .pdb - .combine(ch_dummy_file) - .map { - it[0]["model"] = "helixfold3" - it - } - .set { ch_pdb_msa } + ch_pdb = ch_pdb.mix(RUN_HELIXFOLD3.out.pdb) + ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_HELIXFOLD3.out.top_ranked_pdb) + ch_versions = ch_versions.mix(RUN_HELIXFOLD3.out.versions) ch_top_ranked_pdb .map { [ it[0]["id"], it[0], it[1] ] } @@ -100,9 +90,10 @@ workflow HELIXFOLD3 { .set { ch_pdb_msa } emit: - top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + pdb_msa = ch_pdb_msa // channel: [ meta, /path/to/*.pdb, dummy_file ] + top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* From 1a64519eda5f41014b517eaba6117786116c59bf Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 Nov 2024 15:49:01 +1100 Subject: [PATCH 125/135] Passes linting and tests --- bin/generate_comparison_report.py | 2 +- bin/generate_report.py | 2 +- modules/local/run_helixfold3.nf | 28 ++++++++++++++-------------- workflows/helixfold3.nf | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/bin/generate_comparison_report.py b/bin/generate_comparison_report.py index bea765f9..165f2bdc 100755 --- a/bin/generate_comparison_report.py +++ b/bin/generate_comparison_report.py @@ -50,7 +50,7 @@ def generate_output(plddt_data, name, out_dir, generate_tsv, pdb): linecolor="black", gridcolor="WhiteSmoke", ), - legend=dict(y=0, x=1), + legend=dict(yanchor="bottom", y=0.02, xanchor="right", x=1, bordercolor="Black", borderwidth=1), plot_bgcolor="white", width=600, height=600, diff --git a/bin/generate_report.py b/bin/generate_report.py index b6cfa390..93fad4a6 100755 --- a/bin/generate_report.py +++ b/bin/generate_report.py @@ -120,7 +120,7 @@ def generate_output_images(msa_path, plddt_data, name, out_dir, in_type, generat linecolor="black", gridcolor="WhiteSmoke", ), - legend=dict(yanchor="bottom", y=0, xanchor="right", x=1.3), + legend=dict(yanchor="bottom", y=0.02, xanchor="right", x=1, bordercolor="Black", borderwidth=1), plot_bgcolor="white", width=600, height=600, diff --git a/modules/local/run_helixfold3.nf b/modules/local/run_helixfold3.nf index 8c941af6..9a608a64 100644 --- a/modules/local/run_helixfold3.nf +++ b/modules/local/run_helixfold3.nf @@ -30,10 +30,10 @@ process RUN_HELIXFOLD3 { output: path ("${fasta.baseName}*") - tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: top_ranked_pdb - tuple val(meta), path ("ranked*pdb") , emit: pdb - tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif - tuple val(meta), path ("*_mqc.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_helixfold3.pdb") , emit: top_ranked_pdb + tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb + tuple val(meta), path ("*_mqc.tsv") , emit: multiqc + tuple val(meta), path ("${meta.id}_helixfold3.cif") , emit: main_cif path "versions.yml", emit: versions when: @@ -80,16 +80,15 @@ process RUN_HELIXFOLD3 { cp "${fasta.baseName}"/"${fasta.baseName}"-rank1/predicted_structure.pdb ./"${meta.id}"_helixfold3.pdb cp "${fasta.baseName}"/"${fasta.baseName}"-rank1/predicted_structure.cif ./"${meta.id}"_helixfold3.cif cd "${fasta.baseName}" - awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank1/predicted_structure.pdb | uniq > ranked_1_plddt.tsv + awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank1/predicted_structure.pdb > ranked_1_plddt.tsv for i in 2 3 4 - do awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank\$i/predicted_structure.pdb | uniq | awk '{print \$2}' > ranked_"\$i"_plddt.tsv + do awk '{print \$6"\\t"\$11}' "${fasta.baseName}"-rank\$i/predicted_structure.pdb | awk '{print \$2}' > ranked_"\$i"_plddt.tsv done paste ranked_1_plddt.tsv ranked_2_plddt.tsv ranked_3_plddt.tsv ranked_4_plddt.tsv > plddt.tsv echo -e Positions"\\t"rank_1"\\t"rank_2"\\t"rank_3"\\t"rank_4 > header.tsv cat header.tsv plddt.tsv > ../"${meta.id}"_plddt_mqc.tsv - cp final_features.pkl ../ - for i in 2 3 4 - do cp ""${fasta.baseName}"-rank\$i/predicted_structure.pdb" ../ranked_\$i.pdb + for i in 1 2 3 4 + do cp ""${fasta.baseName}"-rank\$i/predicted_structure.pdb" ./ranked_\$i.pdb done cd .. @@ -103,11 +102,12 @@ process RUN_HELIXFOLD3 { """ touch ./"${meta.id}"_helixfold3.cif touch ./"${meta.id}"_helixfold3.pdb - touch ./"${meta.id}"_mqc.tsv - touch "ranked_1.pdb" - touch "ranked_2.pdb" - touch "ranked_3.pdb" - touch "ranked_4.pdb" + touch ./"${meta.id}"_plddt_mqc.tsv + mkdir "${fasta.baseName}" + touch "${fasta.baseName}/ranked_1.pdb" + touch "${fasta.baseName}/ranked_2.pdb" + touch "${fasta.baseName}/ranked_3.pdb" + touch "${fasta.baseName}/ranked_4.pdb" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/workflows/helixfold3.nf b/workflows/helixfold3.nf index 1d18e7bd..f19003a2 100644 --- a/workflows/helixfold3.nf +++ b/workflows/helixfold3.nf @@ -90,10 +90,10 @@ workflow HELIXFOLD3 { .set { ch_pdb_msa } emit: - pdb_msa = ch_pdb_msa // channel: [ meta, /path/to/*.pdb, dummy_file ] - top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] - multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] + pdb_msa = ch_pdb_msa // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /* From d64b9f1c9cdf84a4836f5db9c49b68006ad52bc0 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 Nov 2024 15:58:13 +1100 Subject: [PATCH 126/135] Aligned with nf-core dev --- assets/comparison_template.html | 54 +++++++++++++++++++-------------- assets/report_template.html | 20 ++++++------ docs/output.md | 12 ++++++++ docs/usage.md | 12 ++++++++ 4 files changed, 65 insertions(+), 33 deletions(-) diff --git a/assets/comparison_template.html b/assets/comparison_template.html index 44158b03..61a916dd 100644 --- a/assets/comparison_template.html +++ b/assets/comparison_template.html @@ -49,26 +49,24 @@ rgba(3, 30, 148, 1) 100% ); } - #lddt_container .modebar { - display: flex !important; - flex-direction: row !important; - } -