From f125fb930bacc9f7b23c35a83f65ed39725e5cc1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Oct 2023 11:58:14 -0400 Subject: [PATCH 01/26] fix: don't jumble sample IDs in QC stats; add read counts b/e each step --- assets/multiqc_config.yaml | 12 +++-- bin/createtable.py | 6 +-- bin/filterMetrics.py | 2 + main.nf | 4 +- modules/CCBR/custom/countfastq/main.nf | 25 ++++++++++ modules/CCBR/custom/countfastq/meta.yml | 39 ++++++++++++++++ modules/CCBR/picard/samtofastq/main.nf | 5 +- .../samtools/flagstat/main.nf | 2 +- .../samtools/flagstat/meta.yml | 0 modules/local/qc.nf | 27 +++++++---- modules/local/samtools_index.nf | 4 +- subworkflows/CCBR/filter_blacklist/main.nf | 9 ++-- subworkflows/CCBR/filter_blacklist/meta.yml | 4 ++ subworkflows/local/align.nf | 17 ++++--- subworkflows/local/qc.nf | 46 ++++++++++++------- 15 files changed, 152 insertions(+), 50 deletions(-) create mode 100644 modules/CCBR/custom/countfastq/main.nf create mode 100644 modules/CCBR/custom/countfastq/meta.yml rename modules/{nf-core => CCBR}/samtools/flagstat/main.nf (95%) rename modules/{nf-core => CCBR}/samtools/flagstat/meta.yml (100%) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 163ca5b4..9bc0c5d2 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -78,13 +78,19 @@ custom_data: NReads: description: "The number of reads sequenced" format: "{:,.0f}" - hidden: True + N_reads_surviving_blacklist: + description: "The number of reads surviving after filtering blacklisted regions" + format: "{:,.0f}" + hidden: true NMappedReads: description: "The number of reads mapped" format: "{:,.0f}" - hidden: True + N_mapped_reads_surviving_filter: + description: "The number of mapped reads surviving after filtering by alignment quality" + format: "{:,.0f}" + hidden: true NUniqMappedReads: - description: "The number of reads remaining after deduplication" + description: "The number of mapped & filtered reads remaining after deduplication" format: "{:,.0f}" NRF: description: "Non-Redundant fraction" diff --git a/bin/createtable.py b/bin/createtable.py index e0d730da..a7aab17c 100755 --- a/bin/createtable.py +++ b/bin/createtable.py @@ -33,10 +33,6 @@ def file2table(): df = pd.DataFrame(tabledict) df.index.name = "SampleName" df.reset_index(inplace=True) - # print(df[['NSC', 'FRiP', 'PCB1', 'PCB2', 'RSC']]) #re-order columns - # cols = df.columns.tolist() # view df columns names - # orderedcols = ordercolumns(cols) - # print(df.to_string()) # sometimes preseq fails, resulting in some columns not being present. # so this only keeps columns that exist in the dict. @@ -46,7 +42,9 @@ def file2table(): for col in [ "SampleName", "NReads", + "N_reads_surviving_blacklist", "NMappedReads", + "N_mapped_reads_surviving_filter", "NUniqMappedReads", "NRF", "PBC1", diff --git a/bin/filterMetrics.py b/bin/filterMetrics.py index 5007132f..38a7fe34 100755 --- a/bin/filterMetrics.py +++ b/bin/filterMetrics.py @@ -64,6 +64,8 @@ def getmetadata(type): metadata = "NReads" elif type == "mnreads": metadata = "NMappedReads" + elif type == "N_mapped_reads_surviving_filter": + metadata = type elif type == "unreads": metadata = "NUniqMappedReads" elif type == "fragLen": diff --git a/main.nf b/main.nf index 9ebead8c..a9812535 100644 --- a/main.nf +++ b/main.nf @@ -64,8 +64,8 @@ workflow { ch_multiqc = Channel.of() if (params.run.qc) { - QC(raw_fastqs, trimmed_fastqs, - aligned_bam, ALIGN_GENOME.out.flagstat, + QC(raw_fastqs, trimmed_fastqs, FILTER_BLACKLIST.out.n_surviving_reads, + aligned_bam, ALIGN_GENOME.out.aligned_flagstat, ALIGN_GENOME.out.filtered_flagstat, deduped_bam, DEDUPLICATE.out.flagstat, PHANTOM_PEAKS.out.spp, frag_lengths, PREPARE_GENOME.out.gene_info, diff --git a/modules/CCBR/custom/countfastq/main.nf b/modules/CCBR/custom/countfastq/main.nf new file mode 100644 index 00000000..e029e4ac --- /dev/null +++ b/modules/CCBR/custom/countfastq/main.nf @@ -0,0 +1,25 @@ + +process CUSTOM_COUNTFASTQ { + tag { meta.id } + label 'process_single' + + container 'nciccbr/ccbr_ubuntu_base_20.04:v5' + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), env(count), emit: count + + script: + """ + count=`zcat ${fastq} | grep "^@" | wc -l` + echo \$count + """ + + stub: + """ + count=-1 + echo \$count + """ +} diff --git a/modules/CCBR/custom/countfastq/meta.yml b/modules/CCBR/custom/countfastq/meta.yml new file mode 100644 index 00000000..b3c0c13d --- /dev/null +++ b/modules/CCBR/custom/countfastq/meta.yml @@ -0,0 +1,39 @@ +name: custom_countfastq +description: | + Count reads in a fastq file + +keywords: + - fastq +tools: + - grep: + description: | + a command-line utility for searching plain-text data sets for lines that match a regular expression + - wc: + description: | + a command-line utility for counting newlines, words, and bytes in plain-text data + - zcat: + description: | + a command-line utility for viewing compressed file content as plain text +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: fastq file + pattern: "*.{fastq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - count: + type: env + description: Number of reads in the fastq files +authors: + - "@kelly-sovacool" +maintainers: + - "@kelly-sovacool" diff --git a/modules/CCBR/picard/samtofastq/main.nf b/modules/CCBR/picard/samtofastq/main.nf index 644f1e4d..13c1f01b 100644 --- a/modules/CCBR/picard/samtofastq/main.nf +++ b/modules/CCBR/picard/samtofastq/main.nf @@ -8,9 +8,10 @@ process PICARD_SAMTOFASTQ { tuple val(meta), path(bam) output: - tuple val(meta), path("*_?.fastq.gz"), emit: reads - path "versions.yml", emit: versions + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*_?.fastq.gz"), emit: paired, optional: true tuple val(meta), path("*unpaired.fastq.gz"), emit: unpaired, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/CCBR/samtools/flagstat/main.nf similarity index 95% rename from modules/nf-core/samtools/flagstat/main.nf rename to modules/CCBR/samtools/flagstat/main.nf index b75707ec..98f8e5df 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/CCBR/samtools/flagstat/main.nf @@ -19,7 +19,7 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${bam.baseName}" """ samtools \\ flagstat \\ diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/CCBR/samtools/flagstat/meta.yml similarity index 100% rename from modules/nf-core/samtools/flagstat/meta.yml rename to modules/CCBR/samtools/flagstat/meta.yml diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 8ab9f39b..158782d2 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -100,7 +100,7 @@ process HANDLE_PRESEQ_ERROR { tuple val(meta), val(log) output: - path("*nrf.txt"), emit: nrf + tuple val(meta), path("*nrf.txt"), emit: nrf script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -123,7 +123,7 @@ process PARSE_PRESEQ_LOG { tuple val(meta), path(log) output: - path("*nrf.txt"), emit: nrf + tuple val(meta), path("*nrf.txt"), emit: nrf script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -150,7 +150,7 @@ process PHANTOM_PEAKS { output: path("${meta.id}.ppqt.pdf") , emit: pdf - path("${meta.id}.spp.out") , emit: spp + tuple val(meta), path("${meta.id}.spp.out") , emit: spp tuple val(meta), path("${meta.id}.fraglen.txt"), emit: fraglen path "versions.yml" , emit: versions @@ -251,12 +251,7 @@ process QC_STATS { container = "${params.containers.base}" input: - tuple val(meta), path(raw_fastq) - tuple val(meta), path(align_flagstat) - tuple path(dedup_flagstat), path(idxstat) - path(preseq_nrf) - path(ppqt_spp) - tuple val(meta), val(fraglen) + tuple val(meta), path(raw_fastq), val(n_reads_after_blacklist), path(aligned_flagstat), path(filtered_flagstat), path(dedup_flagstat), path(idxstat), path(preseq_nrf), path(ppqt_spp), val(fraglen) output: @@ -267,16 +262,28 @@ process QC_STATS { def outfile = "${meta.id}.qc_stats.txt" """ touch ${outfile} + # Number of reads zcat ${raw_fastq} | wc -l | filterMetrics.py ${meta.id} tnreads >> ${outfile} + + # Number of reads after blacklist filter + echo -e "${meta.id}\\tN_reads_surviving_blacklist\\t${n_reads_after_blacklist}" >> ${outfile} + # Number of mapped reads - grep 'mapped (' ${align_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile} + grep 'mapped (' ${aligned_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile} + + # Number of mapped reads surviving filter + grep 'mapped (' ${filtered_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} N_mapped_reads_surviving_filter >> ${outfile} + # Number of uniquely mapped reads grep 'mapped (' ${dedup_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} unreads >> ${outfile} + # NRF, PCB1, PCB2 cat ${preseq_nrf} | filterMetrics.py ${meta.id} nrf >> ${outfile} + # NSC, RSC, Qtag awk '{{print \$(NF-2),\$(NF-1),\$NF}}' ${ppqt_spp} | filterMetrics.py ${meta.id} ppqt >> ${outfile} + # Fragment Length echo "${meta.id}\tFragmentLength\t${fraglen}" >> ${outfile} """ diff --git a/modules/local/samtools_index.nf b/modules/local/samtools_index.nf index 997041d3..f64f73cd 100644 --- a/modules/local/samtools_index.nf +++ b/modules/local/samtools_index.nf @@ -1,4 +1,4 @@ -process SAMTOOLS_INDEX { +process SAMTOOLS_INDEX { // TODO create/use flagstat & idxstat module in nf-modules tag { meta.id } label 'process_medium' @@ -9,7 +9,7 @@ process SAMTOOLS_INDEX { output: tuple val(meta), path("${bam.baseName}.sort.bam"), path("${bam.baseName}.sort.bam.bai"), emit: bam - tuple path("${bam.baseName}.sort.bam.flagstat"), path("${bam.baseName}.sort.bam.idxstat"), emit: flagstat + tuple val(meta), path("${bam.baseName}.sort.bam.flagstat"), path("${bam.baseName}.sort.bam.idxstat"), emit: flagstat script: """ diff --git a/subworkflows/CCBR/filter_blacklist/main.nf b/subworkflows/CCBR/filter_blacklist/main.nf index 14494899..3438b260 100644 --- a/subworkflows/CCBR/filter_blacklist/main.nf +++ b/subworkflows/CCBR/filter_blacklist/main.nf @@ -1,8 +1,9 @@ -include { BWA_MEM } from '../../../modules/CCBR/bwa/mem' +include { BWA_MEM } from '../../../modules/CCBR/bwa/mem' include { SAMTOOLS_FILTERALIGNED } from '../../../modules/CCBR/samtools/filteraligned' -include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq' +include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq' +include { CUSTOM_COUNTFASTQ } from '../../../modules/CCBR/custom/countfastq' workflow FILTER_BLACKLIST { take: @@ -15,6 +16,7 @@ workflow FILTER_BLACKLIST { BWA_MEM ( ch_fastq_input, ch_blacklist_index ) SAMTOOLS_FILTERALIGNED( BWA_MEM.out.bam ) PICARD_SAMTOFASTQ( SAMTOOLS_FILTERALIGNED.out.bam ) + CUSTOM_COUNTFASTQ( PICARD_SAMTOFASTQ.out.paired ) ch_versions = ch_versions.mix( BWA_MEM.out.versions, @@ -23,6 +25,7 @@ workflow FILTER_BLACKLIST { ) emit: - reads = PICARD_SAMTOFASTQ.out.reads // channel: [ val(meta), path(fastq) ] + reads = PICARD_SAMTOFASTQ.out.paired // channel: [ val(meta), path(fastq) ] + n_surviving_reads = CUSTOM_COUNTFASTQ.out.count versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/CCBR/filter_blacklist/meta.yml b/subworkflows/CCBR/filter_blacklist/meta.yml index 7eeb732c..df1797d1 100644 --- a/subworkflows/CCBR/filter_blacklist/meta.yml +++ b/subworkflows/CCBR/filter_blacklist/meta.yml @@ -11,15 +11,19 @@ components: - bwa/mem - samtools/filteraligned - picard/samtofastq + - custom/countfastq input: - ch_fastq_input: + type: map description: | A channel containing fastq files - ch_blacklist_index: + type: file description: | A BWA index created by running BWA/INDEX on a fasta file of blacklisted regions/ output: - reads: + type: map description: | Reads from the fastq files that do not align to the blacklist - versions: diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 2e446ade..e0a0385c 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -1,7 +1,8 @@ include { BWA_MEM } from "../../modules/CCBR/bwa/mem" include { FILTER_QUALITY } from "../../modules/local/align.nf" -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' -include { SAMTOOLS_SORT } from '../../modules/CCBR/samtools/sort/main' // TODO use ccbr samtools/sort +include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_ALIGN + SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_FILTER } from '../../modules/CCBR/samtools/flagstat' +include { SAMTOOLS_SORT } from '../../modules/CCBR/samtools/sort' workflow ALIGN_GENOME { @@ -11,17 +12,19 @@ workflow ALIGN_GENOME { main: BWA_MEM(reads, reference) + SAMTOOLS_FLAGSTAT_ALIGN( BWA_MEM.out.bam ) FILTER_QUALITY( BWA_MEM.out.bam ) SAMTOOLS_SORT( FILTER_QUALITY.out.bam ) - SAMTOOLS_FLAGSTAT( SAMTOOLS_SORT.out.bam ) + SAMTOOLS_FLAGSTAT_FILTER( SAMTOOLS_SORT.out.bam ) ch_versions = Channel.empty().mix( BWA_MEM.out.versions, - SAMTOOLS_FLAGSTAT.out.versions + SAMTOOLS_FLAGSTAT_ALIGN.out.versions ) emit: - bam = FILTER_QUALITY.out.bam - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat - versions = ch_versions + bam = FILTER_QUALITY.out.bam + aligned_flagstat = SAMTOOLS_FLAGSTAT_ALIGN.out.flagstat + filtered_flagstat = SAMTOOLS_FLAGSTAT_FILTER.out.flagstat + versions = ch_versions } diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index 2a377e35..4cf958d3 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -1,4 +1,3 @@ - include { FASTQC as FASTQC_RAW } from "../../modules/local/qc.nf" include { FASTQC as FASTQC_TRIMMED } from "../../modules/local/qc.nf" include { FASTQ_SCREEN } from "../../modules/local/qc.nf" @@ -23,8 +22,10 @@ workflow QC { take: raw_fastqs trimmed_fastqs - aligned_bam + n_reads_surviving_blacklist + aligned_filtered_bam aligned_flagstat + filtered_flagstat deduped_bam deduped_flagstat ppqt_spp @@ -43,10 +44,10 @@ workflow QC { type: 'dir', checkIfExists: true)) | FASTQ_SCREEN ch_multiqc = ch_multiqc.mix(FASTQ_SCREEN.out.screen) } - PRESEQ(aligned_bam) + PRESEQ(aligned_filtered_bam) // when preseq fails, write NAs for the stats that are calculated from its log PRESEQ.out.log - .join(aligned_bam, remainder: true) + .join(aligned_filtered_bam, remainder: true) .branch { meta, preseq_log, bam_tuple -> failed: preseq_log == null return (tuple(meta, "nopresqlog")) @@ -59,15 +60,17 @@ workflow QC { .concat(HANDLE_PRESEQ_ERROR.out.nrf) .set{ preseq_nrf } - QC_STATS( - raw_fastqs, - aligned_flagstat, - deduped_flagstat, - preseq_nrf, - ppqt_spp, - frag_lengths - ) - QC_TABLE(QC_STATS.out.collect()) + // TODO: order of items in channel is not guaranteed. Need to create single channel with all files for QC stats with same metadata + qc_stats_input = raw_fastqs + .join(n_reads_surviving_blacklist) + .join(aligned_flagstat) + .join(filtered_flagstat) + .join(deduped_flagstat) + .join(preseq_nrf) + .join(ppqt_spp) + .join(frag_lengths) + QC_STATS( qc_stats_input ) + QC_TABLE( QC_STATS.out.collect() ) // Deeptools @@ -105,12 +108,23 @@ workflow QC { } .set { ch_ip_ctrl_bigwig } + deduped_flagstat + .map { meta, flagstat, idxstat -> + [ flagstat, idxstat ] + } + .set{ dedup_flagstat_files } + ppqt_spp + .map { meta, spp -> + [ spp ] + } + .set{ ppqt_spp_files } + ch_multiqc = ch_multiqc.mix( FASTQC_RAW.out.zip, FASTQC_TRIMMED.out.zip, - deduped_flagstat, - ppqt_spp, - QC_TABLE.out, + dedup_flagstat_files, + ppqt_spp_files, + QC_TABLE.out.txt, PLOT_FINGERPRINT.out.matrix, PLOT_FINGERPRINT.out.metrics, PLOT_CORRELATION.out.tab, From 00ddb89f405c0320502d99cb219496b96684d40e Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Oct 2023 12:04:48 -0400 Subject: [PATCH 02/26] chore: prepare v0.2.1 --- CHANGELOG.md | 5 +++++ VERSION | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecdad8aa..2f02d3ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## CHAMPAGNE 0.2.1 + +- Fixed a bug in QC stats that mixed up the statistics for different samples. (#125) +- Report read counts between blacklist & filtering steps in the QC table. (#125) + ## CHAMPAGNE 0.2.0 ### New features diff --git a/VERSION b/VERSION index 70426f85..0c62199f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.0-dev +0.2.1 From 578c0f56dba1c4221715e0b5c013b7aff7a4b26f Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Oct 2023 13:35:52 -0400 Subject: [PATCH 03/26] feat: option to skip deeptools QC subworkflow --- nextflow.config | 1 + subworkflows/local/deeptools.nf | 60 ++++++++++++++++++++++++++ subworkflows/local/qc.nf | 74 ++++++++++----------------------- 3 files changed, 82 insertions(+), 53 deletions(-) create mode 100644 subworkflows/local/deeptools.nf diff --git a/nextflow.config b/nextflow.config index 7c74fe0c..f477f9dd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,6 +67,7 @@ params { run { // some steps can be turned on/off for debugging purposes qc = true + deeptools = true normalize_input = true call_peaks = true gem = true diff --git a/subworkflows/local/deeptools.nf b/subworkflows/local/deeptools.nf new file mode 100644 index 00000000..d01fcd8a --- /dev/null +++ b/subworkflows/local/deeptools.nf @@ -0,0 +1,60 @@ + +include { BAM_COVERAGE } from "../../modules/local/deeptools.nf" +include { BIGWIG_SUM } from "../../modules/local/deeptools.nf" +include { BED_PROTEIN_CODING } from "../../modules/local/deeptools.nf" +include { COMPUTE_MATRIX } from "../../modules/local/deeptools.nf" +include { PLOT_FINGERPRINT } from "../../modules/local/deeptools.nf" +include { PLOT_CORRELATION } from "../../modules/local/deeptools.nf" +include { PLOT_PCA } from "../../modules/local/deeptools.nf" +include { PLOT_HEATMAP } from "../../modules/local/deeptools.nf" +include { PLOT_PROFILE } from "../../modules/local/deeptools.nf" + +workflow DEEPTOOLS { + take: + deduped_bam + frag_lengths + effective_genome_size + + main: + + deduped_bam.join(frag_lengths).combine(effective_genome_size) | BAM_COVERAGE + BAM_COVERAGE.out.bigwig.collect().set{ bigwig_list } + BIGWIG_SUM(bigwig_list) + BIGWIG_SUM.out.array.combine(Channel.from('heatmap', 'scatterplot')) | PLOT_CORRELATION + BIGWIG_SUM.out.array | PLOT_PCA + + // Create channel: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] + deduped_bam + .combine(deduped_bam) + .map { + meta1, bam1, bai1, meta2, bam2, bai2 -> + meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null + } + .set { ch_ip_ctrl_bam_bai } + ch_ip_ctrl_bam_bai | PLOT_FINGERPRINT + gene_info | BED_PROTEIN_CODING + COMPUTE_MATRIX(bigwig_list, + BED_PROTEIN_CODING.out.bed.combine(Channel.from('metagene','TSS')) + ) + PLOT_HEATMAP(COMPUTE_MATRIX.out.mat) + PLOT_PROFILE(COMPUTE_MATRIX.out.mat) + + // Create channel: [ meta, ip_bw, control_bw ] + BAM_COVERAGE.out.meta + .merge(BAM_COVERAGE.out.bigwig) + .set { bigwigs } + bigwigs + .combine(bigwigs) + .map { + meta1, bw1, meta2, bw2 -> + meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null + } + .set { ch_ip_ctrl_bigwig } + emit: + bigwig = ch_ip_ctrl_bigwig + fingerprint_matrix = PLOT_FINGERPRINT.out.matrix, + fingerprint_metrics = PLOT_FINGERPRINT.out.metrics, + corr = PLOT_CORRELATION.out.tab, + pca = PLOT_PCA.out.tab, + profile = PLOT_PROFILE.out.tab +} diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index 4cf958d3..383f6f3f 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -1,3 +1,4 @@ +// modules include { FASTQC as FASTQC_RAW } from "../../modules/local/qc.nf" include { FASTQC as FASTQC_TRIMMED } from "../../modules/local/qc.nf" include { FASTQ_SCREEN } from "../../modules/local/qc.nf" @@ -8,15 +9,8 @@ include { QC_STATS } from "../../modules/local/qc.nf" include { QC_TABLE } from "../../modules/local/qc.nf" include { MULTIQC } from "../../modules/local/qc.nf" -include { BAM_COVERAGE } from "../../modules/local/deeptools.nf" -include { BIGWIG_SUM } from "../../modules/local/deeptools.nf" -include { BED_PROTEIN_CODING } from "../../modules/local/deeptools.nf" -include { COMPUTE_MATRIX } from "../../modules/local/deeptools.nf" -include { PLOT_FINGERPRINT } from "../../modules/local/deeptools.nf" -include { PLOT_CORRELATION } from "../../modules/local/deeptools.nf" -include { PLOT_PCA } from "../../modules/local/deeptools.nf" -include { PLOT_HEATMAP } from "../../modules/local/deeptools.nf" -include { PLOT_PROFILE } from "../../modules/local/deeptools.nf" +// subworkflows +include { DEEPTOOLS } from "../../subworkflows/local/deeptools.nf" workflow QC { take: @@ -72,42 +66,6 @@ workflow QC { QC_STATS( qc_stats_input ) QC_TABLE( QC_STATS.out.collect() ) - // Deeptools - - deduped_bam.join(frag_lengths).combine(effective_genome_size) | BAM_COVERAGE - BAM_COVERAGE.out.bigwig.collect().set{ bigwig_list } - BIGWIG_SUM(bigwig_list) - BIGWIG_SUM.out.array.combine(Channel.from('heatmap', 'scatterplot')) | PLOT_CORRELATION - BIGWIG_SUM.out.array | PLOT_PCA - - // Create channel: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] - deduped_bam - .combine(deduped_bam) - .map { - meta1, bam1, bai1, meta2, bam2, bai2 -> - meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null - } - .set { ch_ip_ctrl_bam_bai } - ch_ip_ctrl_bam_bai | PLOT_FINGERPRINT - gene_info | BED_PROTEIN_CODING - COMPUTE_MATRIX(bigwig_list, - BED_PROTEIN_CODING.out.bed.combine(Channel.from('metagene','TSS')) - ) - PLOT_HEATMAP(COMPUTE_MATRIX.out.mat) - PLOT_PROFILE(COMPUTE_MATRIX.out.mat) - - // Create channel: [ meta, ip_bw, control_bw ] - BAM_COVERAGE.out.meta - .merge(BAM_COVERAGE.out.bigwig) - .set { bigwigs } - bigwigs - .combine(bigwigs) - .map { - meta1, bw1, meta2, bw2 -> - meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null - } - .set { ch_ip_ctrl_bigwig } - deduped_flagstat .map { meta, flagstat, idxstat -> [ flagstat, idxstat ] @@ -118,22 +76,32 @@ workflow QC { [ spp ] } .set{ ppqt_spp_files } - ch_multiqc = ch_multiqc.mix( FASTQC_RAW.out.zip, FASTQC_TRIMMED.out.zip, dedup_flagstat_files, ppqt_spp_files, - QC_TABLE.out.txt, - PLOT_FINGERPRINT.out.matrix, - PLOT_FINGERPRINT.out.metrics, - PLOT_CORRELATION.out.tab, - PLOT_PCA.out.tab, - PLOT_PROFILE.out.tab + QC_TABLE.out.txt ) + ch_ip_ctrl_bigwig = Channel.empty() + if (params.run.deeptools) { + DEEPTOOLS( deduped_bam, + frag_lengths, + effective_genome_size + ) + ch_ip_ctrl_bigwig = DEEPTOOLS.out.bigwig + ch_multiqc = ch_multiqc.mix( + DEEPTOOLS.out.fingerprint_matrix, + DEEPTOOLS.out.fingerprint_metrics, + DEEPTOOLS.out.corr, + DEEPTOOLS.out.pca, + DEEPTOOLS.out.profile + ) + } + emit: - bigwigs = ch_ip_ctrl_bigwig + bigwigs = ch_ip_ctrl_bigwig multiqc_input = ch_multiqc } From 194d7005397600a96c83bfce80dc37cb07cda86d Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Oct 2023 19:09:12 -0400 Subject: [PATCH 04/26] fix: move withName selectors to modules.config --- conf/base.config | 5 ----- conf/modules.config | 21 +++++++++++---------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/conf/base.config b/conf/base.config index 6c7a4a17..b99078a6 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -59,10 +58,6 @@ process { errorStrategy = 'retry' maxRetries = 2 } - /* - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - }*/ // Custom CCBR resource requirements withLabel:process_higher { diff --git a/conf/modules.config b/conf/modules.config index 2496842b..536a55f6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -8,20 +8,21 @@ process { errorStrategy = 'finish' - withName: 'INPUT_CHECK:SAMPLESHEET_CHECK' { + /* + withName: '.*CUSTOM_DUMPSOFTWAREVERSIONS' { + cache = false + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + }*/ + + withName: '.*INPUT_CHECK:SAMPLESHEET_CHECK' { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'CUTADAPT' { - ext.args = [ - '--nextseq-trim=2', - '--trim-n -n 5 -O 5', - '-q 10,10', - '-m 20', - '-b file:/opt2/TruSeq_and_nextera_adapters.consolidated.fa' - ].join(' ').trim() - } } From 7685049160fccea4cf9675456011b754eb258542 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Oct 2023 19:19:58 -0400 Subject: [PATCH 05/26] fix: deeptools subwf syntax --- conf/test.config | 1 + main.nf | 4 ++++ subworkflows/local/deeptools.nf | 10 ++++++---- subworkflows/local/qc.nf | 3 ++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/conf/test.config b/conf/test.config index 8a1d203c..f1d813c0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,6 +18,7 @@ params { deeptools.excluded_chroms = 'chrM' run { qc = true + deeptools = true normalize_input = true call_peaks = true gem = true diff --git a/main.nf b/main.nf index a9812535..4c3d1be1 100644 --- a/main.nf +++ b/main.nf @@ -41,6 +41,10 @@ workflow MAKE_REFERENCE { // MAIN WORKFLOW workflow { + CHIPSEQ() +} + +workflow CHIPSEQ { INPUT_CHECK(file(params.input), params.seq_center) INPUT_CHECK.out.reads.set { raw_fastqs } raw_fastqs | CUTADAPT diff --git a/subworkflows/local/deeptools.nf b/subworkflows/local/deeptools.nf index d01fcd8a..da601911 100644 --- a/subworkflows/local/deeptools.nf +++ b/subworkflows/local/deeptools.nf @@ -14,6 +14,7 @@ workflow DEEPTOOLS { deduped_bam frag_lengths effective_genome_size + gene_info main: @@ -50,11 +51,12 @@ workflow DEEPTOOLS { meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null } .set { ch_ip_ctrl_bigwig } + emit: bigwig = ch_ip_ctrl_bigwig - fingerprint_matrix = PLOT_FINGERPRINT.out.matrix, - fingerprint_metrics = PLOT_FINGERPRINT.out.metrics, - corr = PLOT_CORRELATION.out.tab, - pca = PLOT_PCA.out.tab, + fingerprint_matrix = PLOT_FINGERPRINT.out.matrix + fingerprint_metrics = PLOT_FINGERPRINT.out.metrics + corr = PLOT_CORRELATION.out.tab + pca = PLOT_PCA.out.tab profile = PLOT_PROFILE.out.tab } diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index 383f6f3f..3479ded9 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -88,7 +88,8 @@ workflow QC { if (params.run.deeptools) { DEEPTOOLS( deduped_bam, frag_lengths, - effective_genome_size + effective_genome_size, + gene_info ) ch_ip_ctrl_bigwig = DEEPTOOLS.out.bigwig ch_multiqc = ch_multiqc.mix( From 78249174ce2c07126fc7db464c4835097eedc3e7 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Oct 2023 19:33:15 -0400 Subject: [PATCH 06/26] test: move cli-specific test to separate subdir --- .github/workflows/build.yml | 8 ++++---- nextflow.config | 6 ------ tests/.gitignore | 7 ------- tests/cli/.gitignore | 4 ++++ {conf => tests/cli}/ci_stub.config | 0 {conf => tests/cli}/ci_test.config | 0 {assets => tests/cli}/fastq_screen_ci.conf | 0 7 files changed, 8 insertions(+), 17 deletions(-) delete mode 100644 tests/.gitignore create mode 100644 tests/cli/.gitignore rename {conf => tests/cli}/ci_stub.config (100%) rename {conf => tests/cli}/ci_test.config (100%) rename {assets => tests/cli}/fastq_screen_ci.conf (100%) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index de1e513e..4314b5ab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,16 +41,16 @@ jobs: pip install .[dev,test] - name: Stub run run: | - cd tests/ + cd tests/cli which champagne champagne init - champagne run -profile ci_stub -stub + champagne run -stub -c ci_stub - name: Test run if: ${{ env.test_run == 'true' }} run: | - cd tests/ + cd tests/cli champagne init - champagne run -profile ci_test,docker + champagne run -profile docker -c ci_test.config - name: "Upload Artifact" uses: actions/upload-artifact@v3 if: always() # run even if previous steps fail diff --git a/nextflow.config b/nextflow.config index f477f9dd..34f9a3c2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -113,12 +113,6 @@ profiles { full_mm10 { includeConfig "conf/full_mm10.config" } - ci_stub { - includeConfig "conf/ci_stub.config" - } - ci_test { - includeConfig "conf/ci_test.config" - } } includeConfig 'conf/genomes.config' diff --git a/tests/.gitignore b/tests/.gitignore deleted file mode 100644 index 29db90be..00000000 --- a/tests/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -assets/ -conf/ -results/ -work/ - -nextflow.config -submit_slurm.sh diff --git a/tests/cli/.gitignore b/tests/cli/.gitignore new file mode 100644 index 00000000..810a465d --- /dev/null +++ b/tests/cli/.gitignore @@ -0,0 +1,4 @@ +/assets/ +/conf/ +/nextflow.config +/submit_slurm.sh diff --git a/conf/ci_stub.config b/tests/cli/ci_stub.config similarity index 100% rename from conf/ci_stub.config rename to tests/cli/ci_stub.config diff --git a/conf/ci_test.config b/tests/cli/ci_test.config similarity index 100% rename from conf/ci_test.config rename to tests/cli/ci_test.config diff --git a/assets/fastq_screen_ci.conf b/tests/cli/fastq_screen_ci.conf similarity index 100% rename from assets/fastq_screen_ci.conf rename to tests/cli/fastq_screen_ci.conf From 59d52bcd9dcca089449d084720507d6d02934904 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 27 Oct 2023 21:43:12 -0400 Subject: [PATCH 07/26] fix: ignore output dir --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1905fa74..b504e6de 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ replay_pid* /work*/ /data/ /results/ +/output/ /params.yaml # python packaging From 30e28201a829a0eac39f0f730c0e63ea66dc9239 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 30 Oct 2023 15:59:49 -0400 Subject: [PATCH 08/26] style: sort df rows by sample name --- bin/createtable.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/createtable.py b/bin/createtable.py index a7aab17c..b14314d1 100755 --- a/bin/createtable.py +++ b/bin/createtable.py @@ -33,6 +33,7 @@ def file2table(): df = pd.DataFrame(tabledict) df.index.name = "SampleName" df.reset_index(inplace=True) + df = df.sort_values(by="SampleName") # sometimes preseq fails, resulting in some columns not being present. # so this only keeps columns that exist in the dict. From 3b4be6a0ff4809870fa78417cf61205105ef00bd Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 30 Oct 2023 16:01:41 -0400 Subject: [PATCH 09/26] fix: increase default resources across process selectors --- conf/base.config | 17 +++++------------ modules/CCBR/bwa/mem/main.nf | 2 +- modules/local/deduplicate.nf | 2 +- modules/local/deeptools.nf | 8 ++++---- modules/local/qc.nf | 3 +-- 5 files changed, 12 insertions(+), 20 deletions(-) diff --git a/conf/base.config b/conf/base.config index b99078a6..c5251257 100644 --- a/conf/base.config +++ b/conf/base.config @@ -31,19 +31,19 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } + cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } + cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { check_max( 32 * task.attempt, 'cpus' ) } + memory = { check_max( 120.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } @@ -58,11 +58,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - - // Custom CCBR resource requirements - withLabel:process_higher { - cpus = { check_max( 32 * task.attempt, 'cpus' ) } - memory = { check_max( 120.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } } diff --git a/modules/CCBR/bwa/mem/main.nf b/modules/CCBR/bwa/mem/main.nf index fe70049f..e9fa59f8 100644 --- a/modules/CCBR/bwa/mem/main.nf +++ b/modules/CCBR/bwa/mem/main.nf @@ -1,6 +1,6 @@ process BWA_MEM { tag { meta.id } - label 'process_higher' + label 'process_high' container 'nciccbr/ccbr_ubuntu_base_20.04:v5' diff --git a/modules/local/deduplicate.nf b/modules/local/deduplicate.nf index 7d574a7a..5153ff19 100644 --- a/modules/local/deduplicate.nf +++ b/modules/local/deduplicate.nf @@ -44,7 +44,7 @@ process MACS2_DEDUP { process PICARD_DEDUP { tag { meta.id } label 'dedup' - label 'process_higher' + label 'process_high' container "${params.containers.picard}" diff --git a/modules/local/deeptools.nf b/modules/local/deeptools.nf index 42bd5a48..3a5db905 100644 --- a/modules/local/deeptools.nf +++ b/modules/local/deeptools.nf @@ -3,7 +3,7 @@ process BAM_COVERAGE { tag { meta.id } label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" @@ -125,7 +125,7 @@ process PLOT_PCA { process PLOT_FINGERPRINT { label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" @@ -187,7 +187,7 @@ process BED_PROTEIN_CODING { process COMPUTE_MATRIX { label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" @@ -303,7 +303,7 @@ process PLOT_PROFILE { process NORMALIZE_INPUT { label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 158782d2..5f844ca4 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -2,7 +2,7 @@ process FASTQC { tag { meta.id } label 'qc' - label 'process_higher' + label 'process_high' publishDir "${params.outdir}/qc/fastqc_${fqtype}/${meta.id}", mode: "${params.publish_dir_mode}" container = "${params.containers.fastqc}" @@ -258,7 +258,6 @@ process QC_STATS { path("${meta.id}.qc_stats.txt") script: - // TODO: handle paired reads def outfile = "${meta.id}.qc_stats.txt" """ touch ${outfile} From 86dcab1188154119cdde3c1b15a70b570aac9c53 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Oct 2023 09:37:40 -0400 Subject: [PATCH 10/26] fix: increase memory for samtools sort --- modules/CCBR/samtools/sort/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/CCBR/samtools/sort/main.nf b/modules/CCBR/samtools/sort/main.nf index 359b871d..5cf9e8d5 100644 --- a/modules/CCBR/samtools/sort/main.nf +++ b/modules/CCBR/samtools/sort/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_SORT { tag { meta.id } - label 'process_medium' + label 'process_high' container 'nciccbr/ccbr_ubuntu_base_20.04:v6' From fee3d57435f524cdfdaf58cacb6230060e79f369 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Oct 2023 10:30:10 -0400 Subject: [PATCH 11/26] fix: correct fastq count method --- modules/CCBR/custom/countfastq/main.nf | 10 ++++------ .../countfastq/templates/count-fastq.py | 19 +++++++++++++++++++ modules/local/qc.nf | 5 +++-- subworkflows/local/qc.nf | 1 - 4 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 modules/CCBR/custom/countfastq/templates/count-fastq.py diff --git a/modules/CCBR/custom/countfastq/main.nf b/modules/CCBR/custom/countfastq/main.nf index e029e4ac..ddaf682b 100644 --- a/modules/CCBR/custom/countfastq/main.nf +++ b/modules/CCBR/custom/countfastq/main.nf @@ -3,19 +3,17 @@ process CUSTOM_COUNTFASTQ { tag { meta.id } label 'process_single' - container 'nciccbr/ccbr_ubuntu_base_20.04:v5' + container 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' input: tuple val(meta), path(fastq) output: - tuple val(meta), env(count), emit: count + tuple val(meta), path("*.txt"), emit: count script: - """ - count=`zcat ${fastq} | grep "^@" | wc -l` - echo \$count - """ + def txt_filename = "${meta.baseName}.txt" + template 'count-fastq.py' stub: """ diff --git a/modules/CCBR/custom/countfastq/templates/count-fastq.py b/modules/CCBR/custom/countfastq/templates/count-fastq.py new file mode 100644 index 00000000..d83ee8fa --- /dev/null +++ b/modules/CCBR/custom/countfastq/templates/count-fastq.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +import Bio.SeqIO +import gzip +import os + + +def main(): + count = 0 + for fastq_filename in "${fastq}".split(): + with gzip.open(fastq_filename, "rt") as file_handle: + n_seqs = sum(1 for rec in Bio.SeqIO.parse(file_handle, "fastq")) + count += n_seqs + with open("${meta.id}.count.txt", "w") as out_file: + out_file.write(str(count)) + return count + + +if __name__ == "__main__": + print(main()) diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 5f844ca4..576703fe 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -251,7 +251,7 @@ process QC_STATS { container = "${params.containers.base}" input: - tuple val(meta), path(raw_fastq), val(n_reads_after_blacklist), path(aligned_flagstat), path(filtered_flagstat), path(dedup_flagstat), path(idxstat), path(preseq_nrf), path(ppqt_spp), val(fraglen) + tuple val(meta), path(raw_fastq), path(count_file_blacklist), path(aligned_flagstat), path(filtered_flagstat), path(dedup_flagstat), path(idxstat), path(preseq_nrf), path(ppqt_spp), val(fraglen) output: @@ -266,7 +266,8 @@ process QC_STATS { zcat ${raw_fastq} | wc -l | filterMetrics.py ${meta.id} tnreads >> ${outfile} # Number of reads after blacklist filter - echo -e "${meta.id}\\tN_reads_surviving_blacklist\\t${n_reads_after_blacklist}" >> ${outfile} + n_reads_after_blacklist=`cat ${count_file_blacklist}` + echo -e "${meta.id}\\tN_reads_surviving_blacklist\\t\${n_reads_after_blacklist}" >> ${outfile} # Number of mapped reads grep 'mapped (' ${aligned_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile} diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index 3479ded9..75e6975c 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -54,7 +54,6 @@ workflow QC { .concat(HANDLE_PRESEQ_ERROR.out.nrf) .set{ preseq_nrf } - // TODO: order of items in channel is not guaranteed. Need to create single channel with all files for QC stats with same metadata qc_stats_input = raw_fastqs .join(n_reads_surviving_blacklist) .join(aligned_flagstat) From e1bccec04395386a2f9f5d97d165a0792e581341 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Oct 2023 16:16:31 -0400 Subject: [PATCH 12/26] feat: add script to compare tables used for comparing old vs new pipeline stats (#122) --- bin/compare-tables.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 bin/compare-tables.R diff --git a/bin/compare-tables.R b/bin/compare-tables.R new file mode 100644 index 00000000..d8214588 --- /dev/null +++ b/bin/compare-tables.R @@ -0,0 +1,15 @@ +library(tidyverse) + +original <- read.table("QCTable.txt", header = TRUE) %>% + as_tibble() %>% + mutate(across(contains("reads"), as.integer)) %>% + select(c("SampleName", contains("reads"))) %>% + pivot_longer(-SampleName, values_to = "value_orig") +new <- read_tsv("qc_table.tsv") %>% + select(SampleName, original %>% pull(name)) %>% + pivot_longer(-SampleName, values_to = "value_new") + + +inner_join(original, new) %>% + mutate(rel_diff_percent = round(100 * (value_new - value_orig) / value_orig, 2)) %>% + View() From e8847bc41e64c4cab4b2da3bb6bbd117ab3e5fcd Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Oct 2023 20:25:40 -0400 Subject: [PATCH 13/26] fix: increase resources for samtools index --- modules/local/samtools_index.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/samtools_index.nf b/modules/local/samtools_index.nf index f64f73cd..d4848313 100644 --- a/modules/local/samtools_index.nf +++ b/modules/local/samtools_index.nf @@ -1,6 +1,6 @@ process SAMTOOLS_INDEX { // TODO create/use flagstat & idxstat module in nf-modules tag { meta.id } - label 'process_medium' + label 'process_high' container = "${params.containers.base}" From 8f117e69a66cbabbb8bb83e2929be7930d1024d2 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Oct 2023 20:26:02 -0400 Subject: [PATCH 14/26] fix: write table as tsv --- bin/createtable.py | 4 ++-- modules/local/qc.nf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/createtable.py b/bin/createtable.py index b14314d1..a4ab4845 100755 --- a/bin/createtable.py +++ b/bin/createtable.py @@ -57,8 +57,8 @@ def file2table(): ] if col in df_columns ] - - print(df[column_order].to_string(index=False, justify="left")) + df = df[column_order] + df.to_csv("qc_table.tsv", sep="\t", index=False) if __name__ == "__main__": diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 576703fe..12978425 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -304,11 +304,11 @@ process QC_TABLE { path(qc_stats) output: - path("qc_table.txt"), emit: txt + path("qc_table.tsv"), emit: txt script: """ - cat ${qc_stats.join(' ')} | createtable.py > qc_table.txt + cat ${qc_stats.join(' ')} | createtable.py """ stub: From ce264b783fb40c21ab7fa4aa7adadbd4813d81d6 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Oct 2023 21:09:10 -0400 Subject: [PATCH 15/26] feat: count # of peaks --- bin/count-peaks.R | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 bin/count-peaks.R diff --git a/bin/count-peaks.R b/bin/count-peaks.R new file mode 100644 index 00000000..5bc93385 --- /dev/null +++ b/bin/count-peaks.R @@ -0,0 +1,25 @@ +library(tidyverse) +peak_counts <- read_tsv("peak_meta.tsv") %>% + group_by(sample_id, tool) %>% + count() %>% + rename(count_new = n) +peak_counts %>% + pull(tool) %>% + unique() + +peaks_old <- read_tsv("old_peak_counts.tsv") %>% + mutate(tool = str_remove(file, "/.*")) %>% + mutate( + tool = case_when( + tool == "macsBroad" ~ "macs_broad", + tool == "macsNarrow" ~ "macs_narrow", + TRUE ~ tool + ), + sample_id = str_replace(file, ".*/(.*)/.*", "\\1"), + ) %>% + rename(count_old = count) %>% + select(sample_id, tool, count_old) + +inner_join(peaks_old, peak_counts) %>% + mutate(rel_diff_percent = round(100 * (count_new - count_old) / count_old, 2)) %>% + View() From 2ea281163dd3e0a103186efb3e367ca77139e873 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Tue, 31 Oct 2023 21:10:44 -0400 Subject: [PATCH 16/26] fix: renamed qc_table.txt to tsv --- assets/multiqc_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 9bc0c5d2..2e921b59 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -169,7 +169,7 @@ custom_data: sp: QC_Table: - fn: "qc_table.txt" + fn: "qc_table.tsv" NGSQC_data: fn: "*NGSQC.txt" frip_samples: From a55efcb6bdebf082defface235365b1ab74034d8 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 14:18:47 -0400 Subject: [PATCH 17/26] chore: update modules & submodules from remotes --- modules.json | 32 +++++------ .../nf-core/bedtools/getfasta/environment.yml | 6 +++ modules/nf-core/bedtools/getfasta/main.nf | 2 +- modules/nf-core/bwa/index/main.nf | 53 ------------------- modules/nf-core/bwa/index/meta.yml | 45 ---------------- modules/nf-core/bwa/index/tests/main.nf.test | 33 ------------ .../nf-core/bwa/index/tests/main.nf.test.snap | 43 --------------- modules/nf-core/bwa/index/tests/tags.yml | 2 - subworkflows/CCBR/filter_blacklist/main.nf | 7 +-- 9 files changed, 27 insertions(+), 196 deletions(-) create mode 100644 modules/nf-core/bedtools/getfasta/environment.yml delete mode 100644 modules/nf-core/bwa/index/main.nf delete mode 100644 modules/nf-core/bwa/index/meta.yml delete mode 100644 modules/nf-core/bwa/index/tests/main.nf.test delete mode 100644 modules/nf-core/bwa/index/tests/main.nf.test.snap delete mode 100644 modules/nf-core/bwa/index/tests/tags.yml diff --git a/modules.json b/modules.json index f0470703..3232c12b 100644 --- a/modules.json +++ b/modules.json @@ -12,8 +12,13 @@ }, "bwa/mem": { "branch": "main", - "git_sha": "ca4f84b4c2ca84eb0449b4ba414a8b8052f8d90a", - "installed_by": ["filter_blacklist", "modules"] + "git_sha": "7887b0e0dc5a0320d8ba84c2763ef8692c358087", + "installed_by": ["modules", "filter_blacklist"] + }, + "custom/countfastq": { + "branch": "main", + "git_sha": "2ccd43e3734de30fe61ed0ff80e6e3252929505e", + "installed_by": ["filter_blacklist"] }, "cutadapt": { "branch": "main", @@ -27,7 +32,7 @@ }, "picard/samtofastq": { "branch": "main", - "git_sha": "258d0f336ea1f851ab4223d295bb18b6dc187899", + "git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a", "installed_by": ["filter_blacklist"] }, "samtools/filteraligned": { @@ -35,9 +40,14 @@ "git_sha": "879e969c593ab9f321301ac15722728ab30cea49", "installed_by": ["filter_blacklist"] }, + "samtools/flagstat": { + "branch": "main", + "git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a", + "installed_by": ["modules"] + }, "samtools/sort": { "branch": "main", - "git_sha": "d55ab2580b69a81aa0534a3018cc6e6ea3b28640", + "git_sha": "5b39869abfc740c6243d18a3cd84aa7d78787125", "installed_by": ["modules"] } } @@ -46,7 +56,7 @@ "CCBR": { "filter_blacklist": { "branch": "main", - "git_sha": "bb7dbb42afe47d7e02b2f21e3352720ca2996e11", + "git_sha": "b7764378fac18bea8c84f9dd39cb595241b6e796", "installed_by": ["subworkflows"] } } @@ -57,17 +67,7 @@ "nf-core": { "bedtools/getfasta": { "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["modules"] - }, - "bwa/index": { - "branch": "master", - "git_sha": "28a23ea6529caff44855c774f439a4074883027c", - "installed_by": ["modules"] - }, - "samtools/flagstat": { - "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] } } diff --git a/modules/nf-core/bedtools/getfasta/environment.yml b/modules/nf-core/bedtools/getfasta/environment.yml new file mode 100644 index 00000000..55ce727a --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.30.0 diff --git a/modules/nf-core/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf index 84adc4c9..53982e11 100644 --- a/modules/nf-core/bedtools/getfasta/main.nf +++ b/modules/nf-core/bedtools/getfasta/main.nf @@ -2,7 +2,7 @@ process BEDTOOLS_GETFASTA { tag "$bed" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf deleted file mode 100644 index c30d194d..00000000 --- a/modules/nf-core/bwa/index/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process BWA_INDEX { - tag "$fasta" - label 'process_single' - - conda "bioconda::bwa=0.7.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'biocontainers/bwa:0.7.17--hed695b0_7' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path(bwa) , emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${fasta.baseName}" - def args = task.ext.args ?: '' - """ - mkdir bwa - bwa \\ - index \\ - $args \\ - -p bwa/${prefix} \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${fasta.baseName}" - """ - mkdir bwa - - touch bwa/${prefix}.amb - touch bwa/${prefix}.ann - touch bwa/${prefix}.bwt - touch bwa/${prefix}.pac - touch bwa/${prefix}.sa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml deleted file mode 100644 index 730628d0..00000000 --- a/modules/nf-core/bwa/index/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: bwa_index -description: Create BWA index for reference genome -keywords: - - index - - fasta - - genome - - reference -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input genome fasta file -output: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: BWA genome index files - pattern: "*.{amb,ann,bwt,pac,sa}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@maxulysse" -maintainers: - - "@drpatelh" - - "@maxulysse" diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test deleted file mode 100644 index 2f33c0e8..00000000 --- a/modules/nf-core/bwa/index/tests/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process BWA_INDEX" - tag "modules_nfcore" - tag "modules" - tag "bwa" - tag "bwa/index" - script "../main.nf" - process "BWA_INDEX" - - test("BWA index") { - - when { - process { - """ - input[0] = [ - [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap deleted file mode 100644 index 492d8f6a..00000000 --- a/modules/nf-core/bwa/index/tests/main.nf.test.snap +++ /dev/null @@ -1,43 +0,0 @@ -{ - "BWA index": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - [ - "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", - "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", - "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", - "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", - "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" - ] - ] - ], - "1": [ - "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" - ], - "index": [ - [ - { - "id": "test" - }, - [ - "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", - "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", - "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", - "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", - "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" - ] - ] - ], - "versions": [ - "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" - ] - } - ], - "timestamp": "2023-10-17T17:20:20.180927714" - } -} diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml deleted file mode 100644 index 28bb483c..00000000 --- a/modules/nf-core/bwa/index/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bwa/index: - - modules/nf-core/bwa/index/** diff --git a/subworkflows/CCBR/filter_blacklist/main.nf b/subworkflows/CCBR/filter_blacklist/main.nf index 3438b260..4c11dc7e 100644 --- a/subworkflows/CCBR/filter_blacklist/main.nf +++ b/subworkflows/CCBR/filter_blacklist/main.nf @@ -21,11 +21,12 @@ workflow FILTER_BLACKLIST { ch_versions = ch_versions.mix( BWA_MEM.out.versions, SAMTOOLS_FILTERALIGNED.out.versions, - PICARD_SAMTOFASTQ.out.versions + PICARD_SAMTOFASTQ.out.versions, + CUSTOM_COUNTFASTQ.out.versions ) emit: - reads = PICARD_SAMTOFASTQ.out.paired // channel: [ val(meta), path(fastq) ] + reads = PICARD_SAMTOFASTQ.out.paired // channel: [ val(meta), path(fastq) ] n_surviving_reads = CUSTOM_COUNTFASTQ.out.count - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } From 690415296aed828fd1dd1d1224a134b7d00c688e Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 14:21:47 -0400 Subject: [PATCH 18/26] fix(cli): only set -profile flag if values aren't empty --- src/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/util.py b/src/util.py index c4e79851..42b860c8 100644 --- a/src/util.py +++ b/src/util.py @@ -178,7 +178,8 @@ def run_nextflow( profiles.add("slurm") if hpc: profiles.add(hpc_options[hpc]["profile"]) - args_dict["-profile"] = ",".join(sorted(profiles)) + if profiles: + args_dict["-profile"] = ",".join(sorted(profiles)) nextflow_command += list(f"{k} {v}" for k, v in args_dict.items()) # Print nextflow command From 332b7bafd6a2b1af21c8f750534a04e8e5ea84fc Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 14:25:34 -0400 Subject: [PATCH 19/26] ci: fix typo in config filename --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4314b5ab..48d48bf0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,7 +44,7 @@ jobs: cd tests/cli which champagne champagne init - champagne run -stub -c ci_stub + champagne run -stub -c ci_stub.config - name: Test run if: ${{ env.test_run == 'true' }} run: | From 798d50bca89a4c84fd22accef7eb966e819b851f Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 14:36:24 -0400 Subject: [PATCH 20/26] fix: update countfastq module from remote --- modules/CCBR/custom/countfastq/main.nf | 8 ++++-- modules/CCBR/custom/countfastq/meta.yml | 26 ++++++++++--------- .../countfastq/templates/count-fastq.py | 12 +++++++-- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/modules/CCBR/custom/countfastq/main.nf b/modules/CCBR/custom/countfastq/main.nf index ddaf682b..18df8c37 100644 --- a/modules/CCBR/custom/countfastq/main.nf +++ b/modules/CCBR/custom/countfastq/main.nf @@ -10,14 +10,18 @@ process CUSTOM_COUNTFASTQ { output: tuple val(meta), path("*.txt"), emit: count + path('versions.yml'), emit: versions + + when: + task.ext.when == null || task.ext.when script: - def txt_filename = "${meta.baseName}.txt" template 'count-fastq.py' stub: """ count=-1 - echo \$count + echo \$count > ${meta.id}.count.txt + touch versions.yml """ } diff --git a/modules/CCBR/custom/countfastq/meta.yml b/modules/CCBR/custom/countfastq/meta.yml index b3c0c13d..0e1cda48 100644 --- a/modules/CCBR/custom/countfastq/meta.yml +++ b/modules/CCBR/custom/countfastq/meta.yml @@ -1,19 +1,17 @@ name: custom_countfastq description: | Count reads in a fastq file - keywords: - fastq + - biopython + - python tools: - - grep: - description: | - a command-line utility for searching plain-text data sets for lines that match a regular expression - - wc: - description: | - a command-line utility for counting newlines, words, and bytes in plain-text data - - zcat: - description: | - a command-line utility for viewing compressed file content as plain text + - Biopython: + description: | + Python tools for computational molecular biology + homepage: https://biopython.org/ + tool_dev_url: https://github.com/biopython/biopython + doi: 10.1093/bioinformatics/btp163 input: - meta: type: map @@ -31,8 +29,12 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - count: - type: env - description: Number of reads in the fastq files + type: file + description: Plain text file containing the number of reads in the fastq files + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@kelly-sovacool" maintainers: diff --git a/modules/CCBR/custom/countfastq/templates/count-fastq.py b/modules/CCBR/custom/countfastq/templates/count-fastq.py index d83ee8fa..1f89b1ae 100644 --- a/modules/CCBR/custom/countfastq/templates/count-fastq.py +++ b/modules/CCBR/custom/countfastq/templates/count-fastq.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import Bio.SeqIO import gzip -import os +import platform def main(): @@ -15,5 +15,13 @@ def main(): return count +def write_versions(): + with open("versions.yml", "w") as outfile: + outfile.write('"${task.process}":\\n') + outfile.write(f' Python: "{platform.python_version()}"\\n') + outfile.write(f' Biopython: "{Bio.__version__}"\\n') + + if __name__ == "__main__": - print(main()) + write_versions() + main() From a72f2e3c1746014fa60c69518b45ac7edcd710b1 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 14:42:42 -0400 Subject: [PATCH 21/26] ci: fix test/stub config paths --- tests/cli/ci_stub.config | 18 +++++++++--------- tests/cli/ci_test.config | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/cli/ci_stub.config b/tests/cli/ci_stub.config index 2ba83f49..baaf26ee 100644 --- a/tests/cli/ci_stub.config +++ b/tests/cli/ci_stub.config @@ -3,7 +3,7 @@ params { config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir' outdir = 'results/test' - input = 'assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv + input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv genome = 'test' max_cpus = 2 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources @@ -13,19 +13,19 @@ params { publish_dir_mode = "symlink" // CCBR shared resource paths - index_dir = "data" + index_dir = "../data" fastq_screen { - conf = "assets/fastq_screen_ci.conf" - db_dir = "data/fastq_screen_db" + conf = "fastq_screen_ci.conf" + db_dir = "../data/fastq_screen_db" } genomes { 'test' { // blank files for testing stubs on GitHub Actions - blacklist_index = "data/test.blacklist" - reference_index = "data/test/*" + blacklist_index = "${params.index_dir}/test.blacklist" + reference_index = "${params.index_dir}/test/*" effective_genome_size = 2700000000 - chrom_sizes = "data/test.fa.sizes" - gene_info = "data/geneinfo.bed" - chromosomes_dir = "data/chroms/" + chrom_sizes = "${params.index_dir}/test.fa.sizes" + gene_info = "${params.index_dir}/geneinfo.bed" + chromosomes_dir = "${params.index_dir}/chroms/" } } sicer { diff --git a/tests/cli/ci_test.config b/tests/cli/ci_test.config index f262c176..ef47eaff 100644 --- a/tests/cli/ci_test.config +++ b/tests/cli/ci_test.config @@ -3,7 +3,7 @@ params { config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir' outdir = 'results/test' - input = 'assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv + input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv genome = 'custom_genome' read_length = 50 @@ -11,8 +11,8 @@ params { // Genome references genome_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' genes_gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf' - blacklist = 'data/test.blacklist' - rename_contigs = 'assets/R64-1-1_ensembl2UCSC.txt' + blacklist = '../data/test.blacklist' + rename_contigs = '${projectDir}/assets/R64-1-1_ensembl2UCSC.txt' max_cpus = 2 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources @@ -22,7 +22,7 @@ params { publish_dir_mode = "symlink" // CCBR shared resource paths - index_dir = "data" + index_dir = "../data" fastq_screen = null sicer.species = "sacCer1" // supported species https://github.com/zanglab/SICER2/blob/master/sicer/lib/GenomeData.py From 778cdf3815d9833b9aecd353d681722b5ebe4160 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 15:39:57 -0400 Subject: [PATCH 22/26] fix: need double quotes for strings w/ nf variables --- tests/cli/ci_stub.config | 2 +- tests/cli/ci_test.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cli/ci_stub.config b/tests/cli/ci_stub.config index baaf26ee..317a28aa 100644 --- a/tests/cli/ci_stub.config +++ b/tests/cli/ci_stub.config @@ -3,7 +3,7 @@ params { config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir' outdir = 'results/test' - input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv + input = "${projectDir}/assets/samplesheet_test.csv" // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv genome = 'test' max_cpus = 2 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources diff --git a/tests/cli/ci_test.config b/tests/cli/ci_test.config index ef47eaff..df6d9378 100644 --- a/tests/cli/ci_test.config +++ b/tests/cli/ci_test.config @@ -3,7 +3,7 @@ params { config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir' outdir = 'results/test' - input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv + input = "${projectDir}/assets/samplesheet_test.csv" // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv genome = 'custom_genome' read_length = 50 From 10f4e4442a698fef0cf1b947ebddb0865056c044 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 22:39:29 -0400 Subject: [PATCH 23/26] ci: override max cpus on CLI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 48d48bf0..0712d734 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,7 +44,7 @@ jobs: cd tests/cli which champagne champagne init - champagne run -stub -c ci_stub.config + champagne run -stub -c ci_stub.config --max_cpus 2 - name: Test run if: ${{ env.test_run == 'true' }} run: | From cb713db22575d8a4774340ffc6706afca755efbd Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 22:55:12 -0400 Subject: [PATCH 24/26] ci: override max memory --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0712d734..72e4e77c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,7 +44,7 @@ jobs: cd tests/cli which champagne champagne init - champagne run -stub -c ci_stub.config --max_cpus 2 + champagne run -stub -c ci_stub.config --max_cpus 2 --max_memory 6.GB - name: Test run if: ${{ env.test_run == 'true' }} run: | From 35c8bf2ae988d63bb10e6eb3be34af71ffc2c8a3 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 23:10:00 -0400 Subject: [PATCH 25/26] fix: use bam.baseName for output files --- modules/CCBR/samtools/flagstat/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/CCBR/samtools/flagstat/main.nf b/modules/CCBR/samtools/flagstat/main.nf index 98f8e5df..02336a36 100644 --- a/modules/CCBR/samtools/flagstat/main.nf +++ b/modules/CCBR/samtools/flagstat/main.nf @@ -34,7 +34,7 @@ process SAMTOOLS_FLAGSTAT { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${bam.baseName}" """ touch ${prefix}.flagstat From 99da78ea7b0d0dd796bb3542170024382fb7ddca Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 1 Nov 2023 23:10:11 -0400 Subject: [PATCH 26/26] fix: stub output filename --- modules/local/qc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 12978425..6a4ac89f 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -313,7 +313,7 @@ process QC_TABLE { stub: """ - touch qc_table.txt + touch qc_table.tsv """ }