From 0d82991aa7be318c661cd476d634320b40355674 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:58:57 -0500 Subject: [PATCH 1/3] [wfpm v0.8.0] started a new version argo-data-submission-wf@1.0.4 from argo-data-submission-wf@1.0.3 which was released --- argo-data-submission-wf/main.nf | 2 +- argo-data-submission-wf/pkg.json | 4 ++-- argo-data-submission-wf/tests/checker.nf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/argo-data-submission-wf/main.nf b/argo-data-submission-wf/main.nf index 0532d70..ded3963 100755 --- a/argo-data-submission-wf/main.nf +++ b/argo-data-submission-wf/main.nf @@ -22,7 +22,7 @@ */ nextflow.enable.dsl = 2 -version = '1.0.3' +version = '1.0.4' // universal params go here, change default value as needed params.container = "" diff --git a/argo-data-submission-wf/pkg.json b/argo-data-submission-wf/pkg.json index 73abd42..bf84620 100644 --- a/argo-data-submission-wf/pkg.json +++ b/argo-data-submission-wf/pkg.json @@ -1,6 +1,6 @@ { "name": "argo-data-submission-wf", - "version": "1.0.3", + "version": "1.0.4", "description": "ARGO data submission workflow", "main": "main.nf", "deprecated": false, @@ -36,4 +36,4 @@ "license": "GNU Affero General Public License v3", "bugReport": "https://github.com/icgc-argo/argo-data-submission/issues", "homepage": "https://github.com/icgc-argo/argo-data-submission#readme" -} +} \ No newline at end of file diff --git a/argo-data-submission-wf/tests/checker.nf b/argo-data-submission-wf/tests/checker.nf index 4993a5e..adfda8b 100755 --- a/argo-data-submission-wf/tests/checker.nf +++ b/argo-data-submission-wf/tests/checker.nf @@ -26,7 +26,7 @@ */ nextflow.enable.dsl = 2 -version = '1.0.3' +version = '1.0.4' // universal params params.publish_dir = "" From ab451f4b02425f72d71b21f83c77f3305171603c Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Thu, 30 Nov 2023 12:27:03 -0500 Subject: [PATCH 2/3] add new config,dry-run and enforce profiles --- argo-data-submission-wf/main.nf | 160 ++++++++++++++++++++------------ nextflow.config | 24 ++--- 2 files changed, 111 insertions(+), 73 deletions(-) diff --git a/argo-data-submission-wf/main.nf b/argo-data-submission-wf/main.nf index ded3963..27673d6 100755 --- a/argo-data-submission-wf/main.nf +++ b/argo-data-submission-wf/main.nf @@ -44,10 +44,11 @@ params.download_mode="local" params.song_container = "ghcr.io/overture-stack/song-client" params.song_container_version = "5.0.2" params.score_container = "ghcr.io/overture-stack/score" -params.score_container_version = "5.9.0" +params.score_container_version = "5.10.0" params.score_mem = 20 params.score_cpus = 8 params.score_force = false +params.dry_run = false // sanityChecks params.song_url="" @@ -58,7 +59,7 @@ params.api_token="" // payloadJsonToTsvs params.data_directory="NO_FILE1" params.skip_duplicate_check=false - +params.skip_sanity_check=false // payloadGenSeqExperiment params.schema_url="" params.experiment_info_tsv="NO_FILE2" @@ -237,6 +238,19 @@ workflow ArgoDataSubmissionWf { clinical_url main: + if (!"${workflow.profile}".contains('docker') && !"${workflow.profile}".contains('singularity')){ + exit 1, "Error Missing profile. `-profile` must be specified with the engines :`docker` or `singularity`." + } + if (!"${workflow.profile}".contains('rdpc_qa') && !"${workflow.profile}".contains('rdpc_dev') && !"${workflow.profile}".contains('rdpc')){ + exit 1, "Error Missing profile. `-profile` must be specified with the rdpc environments : `rdpc_qa`,`rdpc_dev`, or `rdpc`." + } + + if (!params.api_token){ + if (!params.api_download_token || !params.api_upload_token) { + exit 1, "Error SONG parameters detected but missing token params. `--api_token` or `api_upload_token` and `api_download_token` must be supplied when uploading." + } + } + if ( og_experiment_info_tsv.startsWith("NO_FILE") && \ og_read_group_info_tsv.startsWith("NO_FILE") && \ @@ -278,29 +292,41 @@ workflow ArgoDataSubmissionWf { file(data_directory) ) - sanityCheck( - payloadJsonToTsvs.out.experiment_tsv, - api_token, - song_url, - clinical_url, - params.skip_duplicate_check - ) - - experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv - read_group_info_tsv=payloadJsonToTsvs.out.read_group_tsv - file_info_tsv=payloadJsonToTsvs.out.file_tsv + if (params.skip_sanity_check){ + experiment_info_tsv=file(payloadJsonToTsvs.out.experiment_tsv) + read_group_info_tsv=file(og_read_group_info_tsv) + file_info_tsv=file(og_file_info_tsv) + } else { + sanityCheck( + payloadJsonToTsvs.out.experiment_tsv, + api_token, + song_url, + clinical_url, + params.skip_duplicate_check + ) + + experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv + read_group_info_tsv=file(og_read_group_info_tsv) + file_info_tsv=file(og_file_info_tsv) + } } else { - sanityCheck( - file(og_experiment_info_tsv), - api_token, - song_url, - clinical_url, - params.skip_duplicate_check - ) - - experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv - read_group_info_tsv=file(og_read_group_info_tsv) - file_info_tsv=file(og_file_info_tsv) + if (params.skip_sanity_check){ + experiment_info_tsv=file(og_experiment_info_tsv) + read_group_info_tsv=file(og_read_group_info_tsv) + file_info_tsv=file(og_file_info_tsv) + } else { + sanityCheck( + file(og_experiment_info_tsv), + api_token, + song_url, + clinical_url, + params.skip_duplicate_check + ) + + experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv + read_group_info_tsv=file(og_read_group_info_tsv) + file_info_tsv=file(og_file_info_tsv) + } } checkCramReference( @@ -362,20 +388,22 @@ workflow ArgoDataSubmissionWf { skipping_tests ) - uploadWf( - study_id, - valSeq.out.validated_payload, - sequence_files.collect(), - '' - ) - - submissionReceipt( + if (!params.dry_run){ + uploadWf( study_id, - uploadWf.out.analysis_id, - song_url, - params.skip_submission_check, - sequence_files.collect() - ) + valSeq.out.validated_payload, + sequence_files.collect(), + '' + ) + + submissionReceipt( + study_id, + uploadWf.out.analysis_id, + song_url, + params.skip_submission_check, + sequence_files.collect() + ) + } } else if (checkCramReference.out.check_status && !ref_genome_fa.startsWith("NO_FILE")){ // If reference genome is provided... @@ -403,35 +431,36 @@ workflow ArgoDataSubmissionWf { sequence_files.collect().concat(cram2bam.out.output_bam.collect()).collect(), skipping_tests ) - - uploadWf( - study_id, - valSeq.out.validated_payload, - not_cram_sequence_files.concat(cram2bam.out.output_bam.collect()).collect(), - '' - ) - - submissionReceipt( + if (!params.dry_run){ + uploadWf( study_id, - uploadWf.out.analysis_id, - song_url, - params.skip_submission_check, + valSeq.out.validated_payload, not_cram_sequence_files.concat(cram2bam.out.output_bam.collect()).collect(), - ) + '' + ) + + submissionReceipt( + study_id, + uploadWf.out.analysis_id, + song_url, + params.skip_submission_check, + not_cram_sequence_files.concat(cram2bam.out.output_bam.collect()).collect() + ) + } } - if (params.cleanup && params.download_mode!='local' && ref_genome_fa.startsWith("NO_FILE")) { + if (params.cleanup && params.download_mode!='local' && ref_genome_fa.startsWith("NO_FILE") && !params.dry_run) { // only cleanup the sequence files when they are not from local cleanup( sequence_files.collect(), submissionReceipt.out.receipt // wait until upload is done ) - } else if (params.cleanup && params.download_mode!='local' && !ref_genome_fa.startsWith("NO_FILE")){ + } else if (params.cleanup && params.download_mode!='local' && !ref_genome_fa.startsWith("NO_FILE") && !params.dry_run){ // only cleanup the sequence files and cram2bam output when they are not from local cleanup( sequence_files.collect().concat(cram2bam.out.output_bam.collect()).collect(), submissionReceipt.out.receipt // wait until upload is done ) - } else if (params.cleanup && params.download_mode=='local' && !ref_genome_fa.startsWith("NO_FILE")){ + } else if (params.cleanup && params.download_mode=='local' && !ref_genome_fa.startsWith("NO_FILE") && !params.dry_run){ // only cleanup output from cram2bam on local cleanup( cram2bam.out.output_bam.collect(), @@ -439,16 +468,25 @@ workflow ArgoDataSubmissionWf { ) } - printOut( - pGenExp.out.payload, - uploadWf.out.analysis_id, - submissionReceipt.out.receipt - ) + if (!params.dry_run){ + printOut( + pGenExp.out.payload, + uploadWf.out.analysis_id, + submissionReceipt.out.receipt + ) + out_payload = pGenExp.out.payload + out_analysis_id = uploadWf.out.analysis_id + out_receipt = submissionReceipt.out.receipt + } else { + out_payload = null + out_analysis_id = null + out_receipt = null + } emit: - json_file=pGenExp.out.payload - output_analysis_id=uploadWf.out.analysis_id - receipt=submissionReceipt.out.receipt + out_json_file=out_payload + out_output_analysis_id=out_analysis_id + out_receipt=out_receipt } // this provides an entry point for this main script, so it can be run directly without clone the repo diff --git a/nextflow.config b/nextflow.config index bcf5649..9f15b9f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -56,11 +56,11 @@ process { } profiles { - collab { - params.song_url = 'https://submission-song.rdpc.cancercollaboratory.org' - params.score_url = 'https://submission-score.rdpc.cancercollaboratory.org' + rdpc { + params.song_url = 'https://submission-song.rdpc.argo.genomeinformatics.org' + params.score_url = 'https://submission-score.rdpc.argo.genomeinformatics.org' params.clinical_url = 'https://clinical.platform.icgc-argo.org' - params.schema_url = 'https://submission-song.rdpc.cancercollaboratory.org/schemas/sequencing_experiment' + params.schema_url = 'https://submission-song.rdpc.argo.genomeinformatics.org/schemas/sequencing_experiment' } docker { docker.enabled = true @@ -73,16 +73,16 @@ profiles { singularity.autoMounts = true singularity.engineOptions = '-s' } - debug_qa { - params.song_url = 'https://submission-song.rdpc-qa.cancercollaboratory.org' - params.score_url = 'https://submission-score.rdpc-qa.cancercollaboratory.org' + rdpc_qa { + params.song_url = 'https://submission-song.rdpc-qa.cumulus.genomeinformatics.org' + params.score_url = 'https://submission-score.rdpc-qa.cumulus.genomeinformatics.org' params.clinical_url = 'https://clinical.qa.argo.cancercollaboratory.org' - params.schema_url = 'https://submission-song.rdpc-qa.cancercollaboratory.org/schemas/sequencing_experiment' + params.schema_url = 'https://submission-song.rdpc-qa.cumulus.genomeinformatics.org/schemas/sequencing_experiment' } - debug_dev { - params.song_url = 'https://submission-song.rdpc-dev.cancercollaboratory.org' - params.score_url = 'https://submission-score.rdpc-dev.cancercollaboratory.org' + rdpc_dev { + params.song_url = 'https://submission-song.rdpc-dev.cumulus.genomeinformatics.org' + params.score_url = 'https://submission-score.rdpc-dev.cumulus.genomeinformatics.org' params.clinical_url = 'https://clinical.dev.argo.cancercollaboratory.org' - params.schema_url = 'https://submission-song.rdpc-dev.cancercollaboratory.org/schemas/sequencing_experiment' + params.schema_url = 'https://submission-song.rdpc-dev.cumulus.genomeinformatics.org/schemas/sequencing_experiment' } } From ea73002ae74546f2f8373b01fed19e21252f063c Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Thu, 30 Nov 2023 12:49:06 -0500 Subject: [PATCH 3/3] add test files for prod --- .../tests/input/prod_experiment-fq.v2.tsv | 2 ++ argo-data-submission-wf/tests/input/prod_file-fq.v2.tsv | 7 +++++++ .../tests/input/prod_read_group-fq.v2.tsv | 4 ++++ 3 files changed, 13 insertions(+) create mode 100644 argo-data-submission-wf/tests/input/prod_experiment-fq.v2.tsv create mode 100644 argo-data-submission-wf/tests/input/prod_file-fq.v2.tsv create mode 100644 argo-data-submission-wf/tests/input/prod_read_group-fq.v2.tsv diff --git a/argo-data-submission-wf/tests/input/prod_experiment-fq.v2.tsv b/argo-data-submission-wf/tests/input/prod_experiment-fq.v2.tsv new file mode 100644 index 0000000..f9828c9 --- /dev/null +++ b/argo-data-submission-wf/tests/input/prod_experiment-fq.v2.tsv @@ -0,0 +1,2 @@ +type submitter_sequencing_experiment_id program_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count +sequencing_experiment SubWf_exp_02_FASTQ_input TEST-CA TEST_SUBMITTER_DONOR_ID_zzespofnsa TEST_SUBMITTER_SPECIMEN_ID_zzespofnsaT1 TEST_SUBMITTER_SAMPLE_ID_zzespofnsaT1 TEST_SUBMITTER_SAMPLE_ID_zzespofnsaN1 EXT ILLUMINA HiSeq 2000 WGS 2014-12-12 3 diff --git a/argo-data-submission-wf/tests/input/prod_file-fq.v2.tsv b/argo-data-submission-wf/tests/input/prod_file-fq.v2.tsv new file mode 100644 index 0000000..87a8720 --- /dev/null +++ b/argo-data-submission-wf/tests/input/prod_file-fq.v2.tsv @@ -0,0 +1,7 @@ +type name format size md5sum path +file C0HVY.2_r1.fq.gz FASTQ 2040 c4971f805930e9e31e1c45314c652d3c input/C0HVY.2_r1.fq.gz +file C0HVY.2_r2.fq.gz FASTQ 2042 f60026e682bf55f014c84d494112aa13 input/C0HVY.2_r2.fq.gz +file D0RE2.1_r1.fq.gz FASTQ 2309 835b35f6e7f8263636fce4224b1aca69 input/D0RE2.1_r1.fq.gz +file D0RE2.1_r2.fq.gz FASTQ 2365 23d030ddba2916f8430b13234e3e9a0c input/D0RE2.1_r2.fq.gz +file D0RH0.2_r1.fq.gz FASTQ 1872 b3aa75d0585a989bb25fae252a10b532 input/D0RH0.2_r1.fq.gz +file D0RH0.2_r2.fq.gz FASTQ 1894 c59ebfa4199287bf3911a4c1d6eda71e input/D0RH0.2_r2.fq.gz \ No newline at end of file diff --git a/argo-data-submission-wf/tests/input/prod_read_group-fq.v2.tsv b/argo-data-submission-wf/tests/input/prod_read_group-fq.v2.tsv new file mode 100644 index 0000000..c31bff6 --- /dev/null +++ b/argo-data-submission-wf/tests/input/prod_read_group-fq.v2.tsv @@ -0,0 +1,4 @@ +type submitter_read_group_id read_group_id_in_bam submitter_sequencing_experiment_id platform_unit is_paired_end file_r1 file_r2 read_length_r1 read_length_r2 insert_size sample_barcode library_name +read_group C0HVY.2 SubWf_exp_02_FASTQ_input 74_8a true C0HVY.2_r1.fq.gz C0HVY.2_r2.fq.gz 150 150 298 Pond-147580 +read_group D0RE2.1 SubWf_exp_02_FASTQ_input 74_8b true D0RE2.1_r1.fq.gz D0RE2.1_r2.fq.gz 150 150 298 Pond-147580 +read_group D0RH0.2 SubWf_exp_02_FASTQ_input 74_8c true D0RH0.2_r1.fq.gz D0RH0.2_r2.fq.gz 150 150 298 Pond-147580