From 49eb54c7bd38483f26ae5b2f7a674b8a6ab0f22f Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 11:42:41 -0500 Subject: [PATCH 01/13] fix #191 --- subworkflows/local/input_check.nf | 6 ------ 1 file changed, 6 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 36602633..70c8b4fc 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -33,12 +33,6 @@ def create_fastq_channel(LinkedHashMap row, String seq_center) { meta.antibody = row.antibody meta.control = row.control - def read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\'" - if (seq_center) { - read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\\tCN:${seq_center}\'" - } - meta.read_group = read_group - // add path(s) of the fastq file(s) to the meta map def fastq_meta = [] if (!file(row.fastq_1).exists()) { From 39fe6afaf4777c35c7bd903839000d19a862e846 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 12:12:37 -0500 Subject: [PATCH 02/13] fix: #186 --- modules/local/chipseeker/annotate/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/chipseeker/annotate/main.nf b/modules/local/chipseeker/annotate/main.nf index 756304e7..2091ccc5 100644 --- a/modules/local/chipseeker/annotate/main.nf +++ b/modules/local/chipseeker/annotate/main.nf @@ -1,5 +1,5 @@ process CHIPSEEKER_ANNOTATE { - tag "${meta.id}.${meta.group}" + tag "${meta.id}" label 'peaks' label 'process_high' @@ -19,7 +19,7 @@ process CHIPSEEKER_ANNOTATE { """ chipseeker_annotate.R \\ --peak ${bed} \\ - --outfile-prefix ${meta.id}.${meta.group} \\ + --outfile-prefix ${meta.id} \\ --genome-txdb ${txdb} \\ --genome-annot ${annot_db} \\ --uptss 2000 \\ @@ -32,7 +32,7 @@ process CHIPSEEKER_ANNOTATE { """ for ftype in annotated.txt summary.txt genelist.txt annotation.Rds .png do - touch ${meta.id}.${meta.group}.\${ftype} + touch ${meta.id}.\${ftype} done """ } From 4c5e96e5190e70c4141612205cba8be136463cac Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 12:21:54 -0500 Subject: [PATCH 03/13] fix: iss#185 --- bin/chipseeker_annotate.R | 11 ++++++----- modules/local/chipseeker/annotate/main.nf | 8 ++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bin/chipseeker_annotate.R b/bin/chipseeker_annotate.R index 142e84d0..50ec1451 100755 --- a/bin/chipseeker_annotate.R +++ b/bin/chipseeker_annotate.R @@ -26,6 +26,7 @@ load_package(args$txdb) txdb <- args$txdb %>% rlang::sym() %>% eval() +dir.create(file.path(outfile_prefix), showWarnings = FALSE) # parse peak file np <- read.table(args$peak, sep = "\t") @@ -71,7 +72,7 @@ annot <- annotatePeak( ignoreDownstream = FALSE, overlap = "TSS" ) -saveRDS(annot, file = glue("{outfile_prefix}.annotation.Rds")) +saveRDS(annot, file = glue("{outfile_prefix}/{outfile_prefix}.annotation.Rds")) padf <- as.data.frame(annot) padf$peakID <- paste(padf$seqnames, ":", padf$start, "-", padf$end, sep = "") @@ -103,7 +104,7 @@ merged <- dplyr::full_join(padf, np, by = "peakID") %>% dplyr::rename("#peakID" = "peakID") %>% dplyr::arrange(dplyr::desc(qValue)) -annotated_outfile <- glue("{outfile_prefix}.annotated.txt") +annotated_outfile <- glue("{outfile_prefix}/{outfile_prefix}.annotated.txt") write.table(merged, annotated_outfile, sep = "\t", quote = FALSE, row.names = FALSE) l <- paste("# Median peak width : ", median(merged$width), sep = "") write(l, annotated_outfile, append = TRUE) @@ -114,7 +115,7 @@ write(l, annotated_outfile, append = TRUE) # get promoter genes -outfile_genelist <- glue("{outfile_prefix}.genelist.txt") +outfile_genelist <- glue("{outfile_prefix}/{outfile_prefix}.genelist.txt") # ... all lines with annotation starting with "Promoter" promoters1 <- dplyr::filter(merged, grepl("Promoter", annotation)) # ... all lines with annotation is "5' UTR" @@ -138,7 +139,7 @@ write(l, outfile_genelist, append = TRUE) # annotation type frequency table l <- paste("#annotationType", "frequency", "medianWidth", "medianpValue", "medianqValue", sep = "\t") -outfile_summary <- glue("{outfile_prefix}.summary.txt") +outfile_summary <- glue("{outfile_prefix}/{outfile_prefix}.summary.txt") write(l, outfile_summary) atypes <- c( "3' UTR", @@ -169,7 +170,7 @@ for (ann in c("Exon", "Intron")) { # upset plot ggsave( - filename = glue("{outfile_prefix}_upsetplot.png"), + filename = glue("{outfile_prefix}/{outfile_prefix}_upsetplot.png"), plot = upsetplot(annot, vennpie = TRUE), device = "png" ) diff --git a/modules/local/chipseeker/annotate/main.nf b/modules/local/chipseeker/annotate/main.nf index 2091ccc5..82788116 100644 --- a/modules/local/chipseeker/annotate/main.nf +++ b/modules/local/chipseeker/annotate/main.nf @@ -11,9 +11,9 @@ process CHIPSEEKER_ANNOTATE { val(annot_db) output: - tuple val(meta), path("*.annotated.txt"), path("*.summary.txt"), path("*.genelist.txt"), emit: txt - path("*.annotation.Rds"), emit: annot - path("*.png"), emit: plots + tuple val(meta), path("${meta.id}/*.annotated.txt"), path("${meta.id}/*.summary.txt"), path("${meta.id}/*.genelist.txt"), emit: txt + path("${meta.id}/*.annotation.Rds"), emit: annot + path("${meta.id}/*.png"), emit: plots script: """ @@ -32,7 +32,7 @@ process CHIPSEEKER_ANNOTATE { """ for ftype in annotated.txt summary.txt genelist.txt annotation.Rds .png do - touch ${meta.id}.\${ftype} + touch ${meta.id}/${meta.id}.\${ftype} done """ } From 1193c40f0ec4e1dbdfe1f2e82c1ca3bb6b708b9e Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 12:25:46 -0500 Subject: [PATCH 04/13] fix: increase cpu allocation iss#183 --- modules/local/chipseeker/peakplot/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/chipseeker/peakplot/main.nf b/modules/local/chipseeker/peakplot/main.nf index 9c3eaa4e..eada5474 100644 --- a/modules/local/chipseeker/peakplot/main.nf +++ b/modules/local/chipseeker/peakplot/main.nf @@ -1,7 +1,7 @@ process CHIPSEEKER_PEAKPLOT { tag "${meta.id}" label 'peaks' - label 'process_medium' + label 'process_high' container 'nciccbr/ccbr_chipseeker:1.1.2' From d1ce47a85ba6198c70fd7877c536980f678018fa Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 12:59:26 -0500 Subject: [PATCH 05/13] feat: check contrast manifest during inputcheck fix iss#181 --- main.nf | 6 ++---- subworkflows/local/differential/main.nf | 16 +--------------- subworkflows/local/input_check.nf | 20 ++++++++++++++++++-- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/main.nf b/main.nf index 1549c54c..8851dfab 100644 --- a/main.nf +++ b/main.nf @@ -64,7 +64,7 @@ workflow { } workflow CHIPSEQ { - INPUT_CHECK(file(params.input, checkIfExists: true), params.seq_center) + INPUT_CHECK(file(params.input, checkIfExists: true), params.seq_center, file(params.contrasts, checkIfExists: true)) INPUT_CHECK.out.reads.set { raw_fastqs } raw_fastqs | CUTADAPT @@ -129,7 +129,6 @@ workflow CHIPSEQ { } .set{ ch_consensus_peaks } if (params.contrasts) { - contrasts = file(params.contrasts, checkIfExists: true) // TODO use consensus peaks for regions of interest in diffbind CALL_PEAKS.out.bam_peaks .combine(deduped_bam) @@ -145,8 +144,7 @@ workflow CHIPSEQ { .set{ tagalign_peaks } DIFF( bam_peaks, tagalign_peaks, - INPUT_CHECK.out.csv, - contrasts + INPUT_CHECK.out.contrasts ) } diff --git a/subworkflows/local/differential/main.nf b/subworkflows/local/differential/main.nf index 4115e08a..3c0a1a60 100644 --- a/subworkflows/local/differential/main.nf +++ b/subworkflows/local/differential/main.nf @@ -1,4 +1,3 @@ -include { CHECK_CONTRASTS } from "../../../modules/local/check_contrasts/" include { DIFFBIND } from "../../../subworkflows/local/diffbind/" include { MANORM } from "../../../subworkflows/local/manorm/" @@ -6,22 +5,9 @@ workflow DIFF { take: bam_peaks tagalign_peaks - samplesheet_file - contrasts_file + contrasts main: - - CHECK_CONTRASTS(samplesheet_file, contrasts_file) - .csv - .flatten() - .splitCsv( header: true, sep: ',' ) - .map{ it -> - meta = get_contrast_meta(it) - [ sample_basename: meta.sample_basename, group: meta.group, contrast: meta.contrast ] - } - .unique() - .set{ contrasts } - bam_peaks .map{ meta, bam, bai, peak, ctrl_bam, ctrl_bai -> [meta.sample_basename, meta.rep] } .groupTuple() diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 70c8b4fc..d9dd6f72 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -4,11 +4,13 @@ // include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check.nf' +include { CHECK_CONTRASTS } from "../../modules/local/check_contrasts/" workflow INPUT_CHECK { take: samplesheet // file: /path/to/samplesheet.csv seq_center // string: sequencing center for read group + contrastsheet // file: /path/to/contrast.yaml main: valid_csv = SAMPLESHEET_CHECK( samplesheet ).csv @@ -17,10 +19,24 @@ workflow INPUT_CHECK { .map { create_fastq_channel(it, seq_center) } .set { reads } + if (params.contrasts) { + CHECK_CONTRASTS(samplesheet, contrastsheet) + .csv + .flatten() + .splitCsv( header: true, sep: ',' ) + .map{ it -> + meta = get_contrast_meta(it) + [ sample_basename: meta.sample_basename, group: meta.group, contrast: meta.contrast ] + } + .unique() + .set{ contrasts } + } + emit: reads // channel: [ val(meta), [ reads ] ] - csv = valid_csv - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] + csv = valid_csv + contrasts = contrasts + versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] From 123057b9354c41db19994ea74a4ef3969899ba20 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 13:38:15 -0500 Subject: [PATCH 06/13] chore: fix errors in def location, add comments to list --- subworkflows/local/differential/main.nf | 16 +--------------- subworkflows/local/input_check.nf | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/subworkflows/local/differential/main.nf b/subworkflows/local/differential/main.nf index 3c0a1a60..7e386e9f 100644 --- a/subworkflows/local/differential/main.nf +++ b/subworkflows/local/differential/main.nf @@ -57,18 +57,4 @@ workflow DIFF { diff_peaks = bam_peaks // TODO -} - -def get_contrast_meta(LinkedHashMap row) { - def meta = [:] - meta.id = row.sample - meta.sample_basename = row.sample_basename - meta.rep = row.rep - meta.single_end = row.single_end.toBoolean() - meta.antibody = row.antibody - meta.control = row.control - meta.group = row.group - meta.contrast = row.contrast - - return meta -} +} \ No newline at end of file diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index d9dd6f72..de6f5f81 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -64,3 +64,18 @@ def create_fastq_channel(LinkedHashMap row, String seq_center) { } return fastq_meta } + +// Function to get contrast list of [meta, [id, sample_basename, rep, single_end, antibody, control, group, contrast]] +def get_contrast_meta(LinkedHashMap row) { + def meta = [:] + meta.id = row.sample + meta.sample_basename = row.sample_basename + meta.rep = row.rep + meta.single_end = row.single_end.toBoolean() + meta.antibody = row.antibody + meta.control = row.control + meta.group = row.group + meta.contrast = row.contrast + + return meta +} From a21c8af237302610f948695a0528bdfecc65b41a Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 13:38:58 -0500 Subject: [PATCH 07/13] docs: adding start of manifest docs iss#174 --- docs/manifests.md | 30 ++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 31 insertions(+) create mode 100644 docs/manifests.md diff --git a/docs/manifests.md b/docs/manifests.md new file mode 100644 index 00000000..a5745cab --- /dev/null +++ b/docs/manifests.md @@ -0,0 +1,30 @@ +# Jotting notes here +## Samplemanifest +The following columns are required: + +- sample: sampleID; does not need to be a unique column +- rep: replicateID of sampleID; does not need to be a unique column +- fastq_1: absolute path to R1 of sampleID +- fastq_2: absolute path to R1 of sampleID +- antibody: -c sampleID for mac2; this must match a unique {sample}_{rep} format +- control: + +Example antibody / control format for a single-end project: + +``` +sample,rep,fastq_1,fastq_2,antibody,control +sample,1,/path/to/sample_1.R1.fastq.gz,,input_1,input_1 +sample,2,/path/to/sample_2.R1.fastq.gz,,input_1,input_1 +input,1,/path/to/sample1.R1.fastq.gz,,, +input,2,/path/to/sample1.R1.fastq.gz,,, +``` + +Example antibody / control format for a paired-end project: + +``` +sample,rep,fastq_1,fastq_2,antibody,control +sample,1,/path/to/sample_1.R1.fastq.gz,/path/to/sample_1.R2.fastq.gz,input_1,input_1 +sample,2,/path/to/sample_2.R1.fastq.gz,/path/to/sample_1.R2.fastq.gz,input_1,input_1 +input,1,/path/to/input_1.R1.fastq.gz,/path/to/input_1.R2.fastq.gz,, +input,2,/path/to/input_2.R1.fastq.gz,/path/to/input_2.R2.fastq.gz,, +``` \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index cd9f0885..d8f31d3d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -96,6 +96,7 @@ nav: - Home: index.md - Usage: - Getting Started: getting-started.md + - Creating Manifests: manifests.md - Developer Guide: - How to contribute: contributing.md - Release guide: release-guide.md From 40e067cb26cc7bd00eb0800387fcdbbec766bc21 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Mon, 26 Feb 2024 14:11:41 -0500 Subject: [PATCH 08/13] chore: correct input variables for contrast --- subworkflows/local/input_check.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index de6f5f81..47a1b445 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -20,7 +20,7 @@ workflow INPUT_CHECK { .set { reads } if (params.contrasts) { - CHECK_CONTRASTS(samplesheet, contrastsheet) + CHECK_CONTRASTS(valid_csv, contrastsheet) .csv .flatten() .splitCsv( header: true, sep: ',' ) From ff770fdb2fae99bbccd9b4c9c428d7d08e1618ba Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 1 Mar 2024 11:50:15 -0500 Subject: [PATCH 09/13] docs: add workflow info --- docs/workflow.md | 131 +++++++++++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 132 insertions(+) create mode 100644 docs/workflow.md diff --git a/docs/workflow.md b/docs/workflow.md new file mode 100644 index 00000000..cfe3f1ed --- /dev/null +++ b/docs/workflow.md @@ -0,0 +1,131 @@ +## Process workflow + +Will need to add images to show workflow + +### Pipeline Checks +- Input files are checked that they meet standard formatting; some file access is reviewed + +- Processes include: + + - INPUT_CHECK:SAMPLESHEET_CHECK + - INPUT_CHECK:CHECK_CONTRASTS + +- Output directories include: + + - check_contrasts + +## Pre-alignment +- Adaptors are trimmed, if blacklists are included, filtering occurs + +- Processes include: + + - CUTADAPT + - FILTER_BLACKLIST:BWA_MEM + - FILTER_BLACKLIST:SAMTOOLS_FILTERALIGNED + - FILTER_BLACKLIST:PICARD_SAMTOFASTQ + - FILTER_BLACKLIST:CUSTOM_COUNTFASTQ + +- Output directories include: + + - cutadapt + +## Alignment +- Samples are aligned using BWA; alignment stats are generated; samples are sorted and filtered + +- Processes include: + + - ALIGN_GENOME:BWA_MEM + - ALIGN_GENOME:SAMTOOLS_FLAGSTAT_ALIGN + - ALIGN_GENOME:FILTER_QUALITY + - ALIGN_GENOME:SAMTOOLS_SORT + - ALIGN_GENOME:SAMTOOLS_FLAGSTAT_FILTER + +- Output directories include: + + - bwa_mem + - samtools_flagstat_align + - samtools_filteraligned + - samtools_sort + - samtools_flagstat_filter + +## Deduplicate +- Processes include: + + - DEDUPLICATE:MACS2_DEDUP + - DEDUPLICATE:INDEX_SINGLE + - DEDUPLICATE:PICARD_DEDUP + - DEDUPLICATE:INDEX_PAIRED + +- Output directories include: + +## Quality Control +- Processes include: + + - PPQT_PROCESS + - QC:FASTQC_RAW + - QC:FASTQC_TRIMMED + - QC:FASTQ_SCREE + - QC:PRESEQ + - QC:HANDLE_PRESEQ_ERROR + - QC:PARSE_PRESEQ_LOG + - QC:QC_STATS + - QC:QC_TABLE + +## Deeptools analysis +- Processes include: + + - QC:DEEPTOOLS:BAM_COVERAGE + - QC:DEEPTOOLS:BIGWIG_SUM + - QC:DEEPTOOLS:PLOT_CORRELATION + - QC:DEEPTOOLS:PLOT_PCA + - QC:DEEPTOOLS:NORMALIZE_INPUT + - QC:DEEPTOOLS:BED_PROTEIN_CODING + - QC:DEEPTOOLS:COMPUTE_MATRIX + - QC:DEEPTOOLS:PLOT_HEATMAP + - QC:DEEPTOOLS:PLOT_PROFILE + - QC:DEEPTOOLS:PLOT_FINGERPRINT + +## Peak calling +- Processes include: + + - PHANTOM_PEAKS + - CALL_PEAKS:CALC_GENOME_FRAC + - CALL_PEAKS:BAM_TO_BED + - CALL_PEAKS:MACS_BROAD + - CALL_PEAKS:MACS_NARROW + - CALL_PEAKS:SICER + - CALL_PEAKS:CONVERT_SICER + - CALL_PEAKS:GEM + - CALL_PEAKS:FILTER_GEM + - CALL_PEAKS:FRACTION_IN_PEAKS + - CALL_PEAKS:CONCAT_FRIPS + - CALL_PEAKS:PLOT_FRIP + - CALL_PEAKS:GET_PEAK_META + - CALL_PEAKS:CONCAT_PEAK_META + - CALL_PEAKS:PLOT_PEAK_WIDTHS + +## Consensus Peaks +- Processes include: + + - CONSENSUS_PEAKS:CAT_CAT + - CONSENSUS_PEAKS:SORT_BED + - CONSENSUS_PEAKS:BEDTOOLS_MERGE + - CONSENSUS_PEAKS:- CONSENSUS_PEAKS:_OUT + +## Annotate +- Processes include: + + - ANNOTATE:CHIPSEEKER_PEAKPLOT + - ANNOTATE:CHIPSEEKER_ANNOTATE + - ANNOTATE:CHIPSEEKER_PLOTLIST + - ANNOTATE:HOMER_MOTIFS + - ANNOTATE:MEME_AME + +## Differential Analysis +- If there are more than 2 replicates per group then `diffbind` is performed; otherwise `manorm` pairewise analysis is performed + +- Processes include: + + - DIFF:DIFFBIND:PREP_DIFFBIND + - DIFF:DIFFBIND:DIFFBIND_RMD + - DIFF:MANORM:MANORM_PAIRWISE \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index d8f31d3d..921a8b8d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -97,6 +97,7 @@ nav: - Usage: - Getting Started: getting-started.md - Creating Manifests: manifests.md + - Workflow Overview: workflow.md - Developer Guide: - How to contribute: contributing.md - Release guide: release-guide.md From 0d8dc3611a2b4d7261f2124afd69b4d8b9065e5a Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 1 Mar 2024 11:53:16 -0500 Subject: [PATCH 10/13] fix: increase memory #195 --- modules/local/deeptools.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/deeptools.nf b/modules/local/deeptools.nf index 625b9118..5835cc7a 100644 --- a/modules/local/deeptools.nf +++ b/modules/local/deeptools.nf @@ -70,7 +70,7 @@ process NORMALIZE_INPUT { process BIGWIG_SUM { label 'qc' label 'deeptools' - label 'process_high' + label 'process_high_memory' container "${params.containers.deeptools}" @@ -307,7 +307,7 @@ process PLOT_HEATMAP { process PLOT_PROFILE { label 'qc' label 'deeptools' - label 'process_single' + label 'process_low' container "${params.containers.deeptools}" From 105dd26739f5ce6de477a1b0a63f591fa406c8a3 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Fri, 1 Mar 2024 12:51:22 -0500 Subject: [PATCH 11/13] fix: separates contrast and contrastsheet params #196 --- conf/full_mm10.config | 3 ++- conf/test_human.config | 2 +- conf/test_mm10.config | 3 ++- main.nf | 5 ++++- nextflow.config | 5 ++++- subworkflows/local/input_check.nf | 4 +++- 6 files changed, 16 insertions(+), 6 deletions(-) diff --git a/conf/full_mm10.config b/conf/full_mm10.config index 5d5c7de9..93e6c055 100644 --- a/conf/full_mm10.config +++ b/conf/full_mm10.config @@ -5,7 +5,8 @@ params { genome = 'mm10' outdir = "results/full_mm10" input = "${projectDir}/assets/samplesheet_full_mm10.csv" - contrasts = "${projectDir}/assets/contrasts_full_mm10.yml" + contrasts = 'true' + contrastsheet = "${projectDir}/assets/contrasts_full_mm10.yml" sicer { species = "mm10" // supported species https://github.com/zanglab/SICER2/blob/master/sicer/lib/GenomeData.py } diff --git a/conf/test_human.config b/conf/test_human.config index 2af4f0a8..0ea1f6bb 100644 --- a/conf/test_human.config +++ b/conf/test_human.config @@ -5,7 +5,7 @@ params { genome = 'hg38' outdir = "results/human" input = "assets/samplesheet_human.csv" - contrasts = null //'assets/contrasts_human.yml' + contrasts = false //'assets/contrasts_human.yml' //read_length = 50 sicer.species = "${params.genome}" // supported species https://github.com/zanglab/SICER2/blob/master/sicer/lib/GenomeData.py diff --git a/conf/test_mm10.config b/conf/test_mm10.config index 19f66705..421ff225 100644 --- a/conf/test_mm10.config +++ b/conf/test_mm10.config @@ -5,7 +5,8 @@ params { genome = 'mm10' outdir = "results/test_mm10" input = "${projectDir}/assets/samplesheet_test_mm10.csv" - contrasts = "${projectDir}/assets/contrasts_test_mm10.yml" + contrasts = 'true' + contrastsheet = "${projectDir}/assets/contrasts_test_mm10.yml" read_length = 50 sicer.species = "mm10" // supported species https://github.com/zanglab/SICER2/blob/master/sicer/lib/GenomeData.py diff --git a/main.nf b/main.nf index 8851dfab..5c7b6b4d 100644 --- a/main.nf +++ b/main.nf @@ -36,6 +36,9 @@ include { PHANTOM_PEAKS PPQT_PROCESS MULTIQC } from "./modules/local/qc.nf" + +contrastsheet = params.contrastsheet ?: "/assets/contrast_test.ymls" + workflow.onComplete { if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) { def message = Utils.spooker(workflow) @@ -64,7 +67,7 @@ workflow { } workflow CHIPSEQ { - INPUT_CHECK(file(params.input, checkIfExists: true), params.seq_center, file(params.contrasts, checkIfExists: true)) + INPUT_CHECK(file(params.input, checkIfExists: true), params.seq_center, file(contrastsheet)) INPUT_CHECK.out.reads.set { raw_fastqs } raw_fastqs | CUTADAPT diff --git a/nextflow.config b/nextflow.config index 8b66688b..6c5829d4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -2,11 +2,14 @@ nextflow.enable.dsl = 2 params { input = null - contrasts = null + contrasts = false seq_center = null read_length = null genome = null + // manifest for contrasts + contrastsheet = null + // custom genome options genome_fasta = null genes_gtf = null diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 47a1b445..2d5fdb80 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -19,6 +19,8 @@ workflow INPUT_CHECK { .map { create_fastq_channel(it, seq_center) } .set { reads } + // Run check on the contrast manifest + contrasts=Channel.empty() if (params.contrasts) { CHECK_CONTRASTS(valid_csv, contrastsheet) .csv @@ -33,7 +35,7 @@ workflow INPUT_CHECK { } emit: - reads // channel: [ val(meta), [ reads ] ] + reads = reads // channel: [ val(meta), [ reads ] ] csv = valid_csv contrasts = contrasts versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] From 7fa12f2e978daa64f8de79c01d2a809ab6942b9e Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 24 Apr 2024 10:56:33 -0400 Subject: [PATCH 12/13] docs: add todo to docs/workflow.md --- docs/workflow.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workflow.md b/docs/workflow.md index cfe3f1ed..be2a72d8 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -1,6 +1,6 @@ ## Process workflow -Will need to add images to show workflow +TODO add images to show workflow ### Pipeline Checks - Input files are checked that they meet standard formatting; some file access is reviewed From 3846a5e12b7c5e904e21e45c11bfab69c0dedd44 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 24 Apr 2024 10:56:47 -0400 Subject: [PATCH 13/13] docs: add TODO to docs/manifests.md --- docs/manifests.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/manifests.md b/docs/manifests.md index a5745cab..485ebe19 100644 --- a/docs/manifests.md +++ b/docs/manifests.md @@ -1,4 +1,4 @@ -# Jotting notes here +# TODO Jotting notes here ## Samplemanifest The following columns are required: