From f125fb930bacc9f7b23c35a83f65ed39725e5cc1 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Oct 2023 11:58:14 -0400
Subject: [PATCH 01/26] fix: don't jumble sample IDs in QC stats; add read
 counts b/e each step

---
 assets/multiqc_config.yaml                    | 12 +++--
 bin/createtable.py                            |  6 +--
 bin/filterMetrics.py                          |  2 +
 main.nf                                       |  4 +-
 modules/CCBR/custom/countfastq/main.nf        | 25 ++++++++++
 modules/CCBR/custom/countfastq/meta.yml       | 39 ++++++++++++++++
 modules/CCBR/picard/samtofastq/main.nf        |  5 +-
 .../samtools/flagstat/main.nf                 |  2 +-
 .../samtools/flagstat/meta.yml                |  0
 modules/local/qc.nf                           | 27 +++++++----
 modules/local/samtools_index.nf               |  4 +-
 subworkflows/CCBR/filter_blacklist/main.nf    |  9 ++--
 subworkflows/CCBR/filter_blacklist/meta.yml   |  4 ++
 subworkflows/local/align.nf                   | 17 ++++---
 subworkflows/local/qc.nf                      | 46 ++++++++++++-------
 15 files changed, 152 insertions(+), 50 deletions(-)
 create mode 100644 modules/CCBR/custom/countfastq/main.nf
 create mode 100644 modules/CCBR/custom/countfastq/meta.yml
 rename modules/{nf-core => CCBR}/samtools/flagstat/main.nf (95%)
 rename modules/{nf-core => CCBR}/samtools/flagstat/meta.yml (100%)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index 163ca5b4..9bc0c5d2 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -78,13 +78,19 @@ custom_data:
       NReads:
         description: "The number of reads sequenced"
         format: "{:,.0f}"
-        hidden: True
+      N_reads_surviving_blacklist:
+        description: "The number of reads surviving after filtering blacklisted regions"
+        format: "{:,.0f}"
+        hidden: true
       NMappedReads:
         description: "The number of reads mapped"
         format: "{:,.0f}"
-        hidden: True
+      N_mapped_reads_surviving_filter:
+        description: "The number of mapped reads surviving after filtering by alignment quality"
+        format: "{:,.0f}"
+        hidden: true
       NUniqMappedReads:
-        description: "The number of reads remaining after deduplication"
+        description: "The number of mapped & filtered reads remaining after deduplication"
         format: "{:,.0f}"
       NRF:
         description: "Non-Redundant fraction"
diff --git a/bin/createtable.py b/bin/createtable.py
index e0d730da..a7aab17c 100755
--- a/bin/createtable.py
+++ b/bin/createtable.py
@@ -33,10 +33,6 @@ def file2table():
     df = pd.DataFrame(tabledict)
     df.index.name = "SampleName"
     df.reset_index(inplace=True)
-    # print(df[['NSC', 'FRiP', 'PCB1', 'PCB2', 'RSC']])  #re-order columns
-    # cols = df.columns.tolist() # view df columns names
-    # orderedcols = ordercolumns(cols)
-    # print(df.to_string())
 
     # sometimes preseq fails, resulting in some columns not being present.
     # so this only keeps columns that exist in the dict.
@@ -46,7 +42,9 @@ def file2table():
         for col in [
             "SampleName",
             "NReads",
+            "N_reads_surviving_blacklist",
             "NMappedReads",
+            "N_mapped_reads_surviving_filter",
             "NUniqMappedReads",
             "NRF",
             "PBC1",
diff --git a/bin/filterMetrics.py b/bin/filterMetrics.py
index 5007132f..38a7fe34 100755
--- a/bin/filterMetrics.py
+++ b/bin/filterMetrics.py
@@ -64,6 +64,8 @@ def getmetadata(type):
         metadata = "NReads"
     elif type == "mnreads":
         metadata = "NMappedReads"
+    elif type == "N_mapped_reads_surviving_filter":
+        metadata = type
     elif type == "unreads":
         metadata = "NUniqMappedReads"
     elif type == "fragLen":
diff --git a/main.nf b/main.nf
index 9ebead8c..a9812535 100644
--- a/main.nf
+++ b/main.nf
@@ -64,8 +64,8 @@ workflow {
 
     ch_multiqc = Channel.of()
     if (params.run.qc) {
-        QC(raw_fastqs, trimmed_fastqs,
-           aligned_bam, ALIGN_GENOME.out.flagstat,
+        QC(raw_fastqs, trimmed_fastqs, FILTER_BLACKLIST.out.n_surviving_reads,
+           aligned_bam, ALIGN_GENOME.out.aligned_flagstat, ALIGN_GENOME.out.filtered_flagstat,
            deduped_bam, DEDUPLICATE.out.flagstat,
            PHANTOM_PEAKS.out.spp, frag_lengths,
            PREPARE_GENOME.out.gene_info,
diff --git a/modules/CCBR/custom/countfastq/main.nf b/modules/CCBR/custom/countfastq/main.nf
new file mode 100644
index 00000000..e029e4ac
--- /dev/null
+++ b/modules/CCBR/custom/countfastq/main.nf
@@ -0,0 +1,25 @@
+
+process CUSTOM_COUNTFASTQ {
+    tag { meta.id }
+    label 'process_single'
+
+    container 'nciccbr/ccbr_ubuntu_base_20.04:v5'
+
+    input:
+        tuple val(meta), path(fastq)
+
+    output:
+        tuple val(meta), env(count), emit: count
+
+    script:
+    """
+    count=`zcat ${fastq} | grep "^@" | wc -l`
+    echo \$count
+    """
+
+    stub:
+    """
+    count=-1
+    echo \$count
+    """
+}
diff --git a/modules/CCBR/custom/countfastq/meta.yml b/modules/CCBR/custom/countfastq/meta.yml
new file mode 100644
index 00000000..b3c0c13d
--- /dev/null
+++ b/modules/CCBR/custom/countfastq/meta.yml
@@ -0,0 +1,39 @@
+name: custom_countfastq
+description: |
+  Count reads in a fastq file
+
+keywords:
+  - fastq
+tools:
+  - grep:
+    description: |
+      a command-line utility for searching plain-text data sets for lines that match a regular expression
+  - wc:
+    description: |
+      a command-line utility for counting newlines, words, and bytes in plain-text data
+  - zcat:
+    description: |
+      a command-line utility for viewing compressed file content as plain text
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fastq:
+      type: file
+      description: fastq file
+      pattern: "*.{fastq.gz}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - count:
+      type: env
+      description: Number of reads in the fastq files
+authors:
+  - "@kelly-sovacool"
+maintainers:
+  - "@kelly-sovacool"
diff --git a/modules/CCBR/picard/samtofastq/main.nf b/modules/CCBR/picard/samtofastq/main.nf
index 644f1e4d..13c1f01b 100644
--- a/modules/CCBR/picard/samtofastq/main.nf
+++ b/modules/CCBR/picard/samtofastq/main.nf
@@ -8,9 +8,10 @@ process PICARD_SAMTOFASTQ {
     tuple val(meta), path(bam)
 
     output:
-    tuple val(meta), path("*_?.fastq.gz"),       emit: reads
-    path "versions.yml",                         emit: versions
+    tuple val(meta), path("*.fastq.gz"),         emit: reads
+    tuple val(meta), path("*_?.fastq.gz"),       emit: paired, optional: true
     tuple val(meta), path("*unpaired.fastq.gz"), emit: unpaired, optional: true
+    path "versions.yml",                         emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/CCBR/samtools/flagstat/main.nf
similarity index 95%
rename from modules/nf-core/samtools/flagstat/main.nf
rename to modules/CCBR/samtools/flagstat/main.nf
index b75707ec..98f8e5df 100644
--- a/modules/nf-core/samtools/flagstat/main.nf
+++ b/modules/CCBR/samtools/flagstat/main.nf
@@ -19,7 +19,7 @@ process SAMTOOLS_FLAGSTAT {
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def prefix = task.ext.prefix ?: "${bam.baseName}"
     """
     samtools \\
         flagstat \\
diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/CCBR/samtools/flagstat/meta.yml
similarity index 100%
rename from modules/nf-core/samtools/flagstat/meta.yml
rename to modules/CCBR/samtools/flagstat/meta.yml
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 8ab9f39b..158782d2 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -100,7 +100,7 @@ process HANDLE_PRESEQ_ERROR {
         tuple val(meta), val(log)
 
     output:
-        path("*nrf.txt"), emit: nrf
+        tuple val(meta), path("*nrf.txt"), emit: nrf
 
     script:
     def prefix = task.ext.prefix ?: "${meta.id}"
@@ -123,7 +123,7 @@ process PARSE_PRESEQ_LOG {
         tuple val(meta), path(log)
 
     output:
-        path("*nrf.txt"), emit: nrf
+        tuple val(meta), path("*nrf.txt"), emit: nrf
 
     script:
     def prefix = task.ext.prefix ?: "${meta.id}"
@@ -150,7 +150,7 @@ process PHANTOM_PEAKS {
 
     output:
         path("${meta.id}.ppqt.pdf")                    , emit: pdf
-        path("${meta.id}.spp.out")                     , emit: spp
+        tuple val(meta), path("${meta.id}.spp.out")    , emit: spp
         tuple val(meta), path("${meta.id}.fraglen.txt"), emit: fraglen
         path  "versions.yml"                           , emit: versions
 
@@ -251,12 +251,7 @@ process QC_STATS {
     container = "${params.containers.base}"
 
     input:
-        tuple val(meta), path(raw_fastq)
-        tuple val(meta), path(align_flagstat)
-        tuple path(dedup_flagstat), path(idxstat)
-        path(preseq_nrf)
-        path(ppqt_spp)
-        tuple val(meta), val(fraglen)
+        tuple val(meta), path(raw_fastq), val(n_reads_after_blacklist), path(aligned_flagstat), path(filtered_flagstat), path(dedup_flagstat), path(idxstat), path(preseq_nrf), path(ppqt_spp), val(fraglen)
 
 
     output:
@@ -267,16 +262,28 @@ process QC_STATS {
     def outfile = "${meta.id}.qc_stats.txt"
     """
     touch ${outfile}
+
     # Number of reads
     zcat ${raw_fastq} | wc -l | filterMetrics.py ${meta.id} tnreads >> ${outfile}
+
+    # Number of reads after blacklist filter
+    echo -e "${meta.id}\\tN_reads_surviving_blacklist\\t${n_reads_after_blacklist}" >> ${outfile}
+
     # Number of mapped reads
-    grep 'mapped (' ${align_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile}
+    grep 'mapped (' ${aligned_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile}
+
+    # Number of mapped reads surviving filter
+    grep 'mapped (' ${filtered_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} N_mapped_reads_surviving_filter >> ${outfile}
+
     # Number of uniquely mapped reads
     grep 'mapped (' ${dedup_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} unreads >> ${outfile}
+
     # NRF, PCB1, PCB2
     cat ${preseq_nrf} | filterMetrics.py ${meta.id} nrf >> ${outfile}
+
     # NSC, RSC, Qtag
     awk '{{print \$(NF-2),\$(NF-1),\$NF}}' ${ppqt_spp} | filterMetrics.py ${meta.id} ppqt >> ${outfile}
+
     # Fragment Length
     echo "${meta.id}\tFragmentLength\t${fraglen}" >> ${outfile}
     """
diff --git a/modules/local/samtools_index.nf b/modules/local/samtools_index.nf
index 997041d3..f64f73cd 100644
--- a/modules/local/samtools_index.nf
+++ b/modules/local/samtools_index.nf
@@ -1,4 +1,4 @@
-process SAMTOOLS_INDEX {
+process SAMTOOLS_INDEX { // TODO create/use flagstat & idxstat module in nf-modules
     tag { meta.id }
     label 'process_medium'
 
@@ -9,7 +9,7 @@ process SAMTOOLS_INDEX {
 
     output:
         tuple val(meta), path("${bam.baseName}.sort.bam"), path("${bam.baseName}.sort.bam.bai"), emit: bam
-        tuple path("${bam.baseName}.sort.bam.flagstat"), path("${bam.baseName}.sort.bam.idxstat"), emit: flagstat
+        tuple val(meta), path("${bam.baseName}.sort.bam.flagstat"), path("${bam.baseName}.sort.bam.idxstat"), emit: flagstat
 
     script:
     """
diff --git a/subworkflows/CCBR/filter_blacklist/main.nf b/subworkflows/CCBR/filter_blacklist/main.nf
index 14494899..3438b260 100644
--- a/subworkflows/CCBR/filter_blacklist/main.nf
+++ b/subworkflows/CCBR/filter_blacklist/main.nf
@@ -1,8 +1,9 @@
 
 
-include { BWA_MEM                 } from '../../../modules/CCBR/bwa/mem'
+include { BWA_MEM                } from '../../../modules/CCBR/bwa/mem'
 include { SAMTOOLS_FILTERALIGNED } from '../../../modules/CCBR/samtools/filteraligned'
-include { PICARD_SAMTOFASTQ       } from '../../../modules/CCBR/picard/samtofastq'
+include { PICARD_SAMTOFASTQ      } from '../../../modules/CCBR/picard/samtofastq'
+include { CUSTOM_COUNTFASTQ      } from '../../../modules/CCBR/custom/countfastq'
 
 workflow FILTER_BLACKLIST {
     take:
@@ -15,6 +16,7 @@ workflow FILTER_BLACKLIST {
         BWA_MEM ( ch_fastq_input, ch_blacklist_index )
         SAMTOOLS_FILTERALIGNED( BWA_MEM.out.bam )
         PICARD_SAMTOFASTQ( SAMTOOLS_FILTERALIGNED.out.bam )
+        CUSTOM_COUNTFASTQ( PICARD_SAMTOFASTQ.out.paired )
 
         ch_versions = ch_versions.mix(
             BWA_MEM.out.versions,
@@ -23,6 +25,7 @@ workflow FILTER_BLACKLIST {
         )
 
     emit:
-        reads =  PICARD_SAMTOFASTQ.out.reads  // channel: [ val(meta), path(fastq) ]
+        reads =  PICARD_SAMTOFASTQ.out.paired  // channel: [ val(meta), path(fastq) ]
+        n_surviving_reads = CUSTOM_COUNTFASTQ.out.count
         versions = ch_versions           // channel: [ path(versions.yml) ]
 }
diff --git a/subworkflows/CCBR/filter_blacklist/meta.yml b/subworkflows/CCBR/filter_blacklist/meta.yml
index 7eeb732c..df1797d1 100644
--- a/subworkflows/CCBR/filter_blacklist/meta.yml
+++ b/subworkflows/CCBR/filter_blacklist/meta.yml
@@ -11,15 +11,19 @@ components:
   - bwa/mem
   - samtools/filteraligned
   - picard/samtofastq
+  - custom/countfastq
 input:
   - ch_fastq_input:
+      type: map
       description: |
         A channel containing fastq files
   - ch_blacklist_index:
+      type: file
       description: |
         A BWA index created by running BWA/INDEX on a fasta file of blacklisted regions/
 output:
   - reads:
+      type: map
       description: |
         Reads from the fastq files that do not align to the blacklist
   - versions:
diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index 2e446ade..e0a0385c 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -1,7 +1,8 @@
 include { BWA_MEM           } from "../../modules/CCBR/bwa/mem"
 include { FILTER_QUALITY    } from "../../modules/local/align.nf"
-include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main'
-include { SAMTOOLS_SORT     } from '../../modules/CCBR/samtools/sort/main' // TODO use ccbr samtools/sort
+include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_ALIGN
+          SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_FILTER } from '../../modules/CCBR/samtools/flagstat'
+include { SAMTOOLS_SORT     } from '../../modules/CCBR/samtools/sort'
 
 workflow ALIGN_GENOME {
 
@@ -11,17 +12,19 @@ workflow ALIGN_GENOME {
 
     main:
         BWA_MEM(reads, reference)
+        SAMTOOLS_FLAGSTAT_ALIGN( BWA_MEM.out.bam )
         FILTER_QUALITY( BWA_MEM.out.bam )
         SAMTOOLS_SORT( FILTER_QUALITY.out.bam )
-        SAMTOOLS_FLAGSTAT( SAMTOOLS_SORT.out.bam )
+        SAMTOOLS_FLAGSTAT_FILTER( SAMTOOLS_SORT.out.bam )
 
         ch_versions = Channel.empty().mix(
             BWA_MEM.out.versions,
-            SAMTOOLS_FLAGSTAT.out.versions
+            SAMTOOLS_FLAGSTAT_ALIGN.out.versions
         )
 
     emit:
-        bam = FILTER_QUALITY.out.bam
-        flagstat = SAMTOOLS_FLAGSTAT.out.flagstat
-        versions = ch_versions
+        bam               = FILTER_QUALITY.out.bam
+        aligned_flagstat  = SAMTOOLS_FLAGSTAT_ALIGN.out.flagstat
+        filtered_flagstat = SAMTOOLS_FLAGSTAT_FILTER.out.flagstat
+        versions          = ch_versions
 }
diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf
index 2a377e35..4cf958d3 100644
--- a/subworkflows/local/qc.nf
+++ b/subworkflows/local/qc.nf
@@ -1,4 +1,3 @@
-
 include { FASTQC as FASTQC_RAW     } from "../../modules/local/qc.nf"
 include { FASTQC as FASTQC_TRIMMED } from "../../modules/local/qc.nf"
 include { FASTQ_SCREEN             } from "../../modules/local/qc.nf"
@@ -23,8 +22,10 @@ workflow QC {
     take:
         raw_fastqs
         trimmed_fastqs
-        aligned_bam
+        n_reads_surviving_blacklist
+        aligned_filtered_bam
         aligned_flagstat
+        filtered_flagstat
         deduped_bam
         deduped_flagstat
         ppqt_spp
@@ -43,10 +44,10 @@ workflow QC {
                                         type: 'dir', checkIfExists: true)) | FASTQ_SCREEN
             ch_multiqc = ch_multiqc.mix(FASTQ_SCREEN.out.screen)
         }
-        PRESEQ(aligned_bam)
+        PRESEQ(aligned_filtered_bam)
         // when preseq fails, write NAs for the stats that are calculated from its log
         PRESEQ.out.log
-            .join(aligned_bam, remainder: true)
+            .join(aligned_filtered_bam, remainder: true)
             .branch { meta, preseq_log, bam_tuple ->
             failed: preseq_log == null
                 return (tuple(meta, "nopresqlog"))
@@ -59,15 +60,17 @@ workflow QC {
             .concat(HANDLE_PRESEQ_ERROR.out.nrf)
             .set{ preseq_nrf }
 
-        QC_STATS(
-            raw_fastqs,
-            aligned_flagstat,
-            deduped_flagstat,
-            preseq_nrf,
-            ppqt_spp,
-            frag_lengths
-        )
-        QC_TABLE(QC_STATS.out.collect())
+        // TODO: order of items in channel is not guaranteed. Need to create single channel with all files for QC stats with same metadata
+        qc_stats_input = raw_fastqs
+            .join(n_reads_surviving_blacklist)
+            .join(aligned_flagstat)
+            .join(filtered_flagstat)
+            .join(deduped_flagstat)
+            .join(preseq_nrf)
+            .join(ppqt_spp)
+            .join(frag_lengths)
+        QC_STATS( qc_stats_input )
+        QC_TABLE( QC_STATS.out.collect() )
 
         // Deeptools
 
@@ -105,12 +108,23 @@ workflow QC {
             }
             .set { ch_ip_ctrl_bigwig }
 
+        deduped_flagstat
+            .map { meta, flagstat, idxstat ->
+                [ flagstat, idxstat ]
+            }
+            .set{ dedup_flagstat_files }
+        ppqt_spp
+            .map { meta, spp ->
+                [ spp ]
+            }
+            .set{ ppqt_spp_files }
+
         ch_multiqc = ch_multiqc.mix(
             FASTQC_RAW.out.zip,
             FASTQC_TRIMMED.out.zip,
-            deduped_flagstat,
-            ppqt_spp,
-            QC_TABLE.out,
+            dedup_flagstat_files,
+            ppqt_spp_files,
+            QC_TABLE.out.txt,
             PLOT_FINGERPRINT.out.matrix,
             PLOT_FINGERPRINT.out.metrics,
             PLOT_CORRELATION.out.tab,

From 00ddb89f405c0320502d99cb219496b96684d40e Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Oct 2023 12:04:48 -0400
Subject: [PATCH 02/26] chore: prepare v0.2.1

---
 CHANGELOG.md | 5 +++++
 VERSION      | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ecdad8aa..2f02d3ca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+## CHAMPAGNE 0.2.1
+
+- Fixed a bug in QC stats that mixed up the statistics for different samples. (#125)
+- Report read counts between blacklist & filtering steps in the QC table. (#125)
+
 ## CHAMPAGNE 0.2.0
 
 ### New features
diff --git a/VERSION b/VERSION
index 70426f85..0c62199f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.2.0-dev
+0.2.1

From 578c0f56dba1c4221715e0b5c013b7aff7a4b26f Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Oct 2023 13:35:52 -0400
Subject: [PATCH 03/26] feat: option to skip deeptools QC subworkflow

---
 nextflow.config                 |  1 +
 subworkflows/local/deeptools.nf | 60 ++++++++++++++++++++++++++
 subworkflows/local/qc.nf        | 74 ++++++++++-----------------------
 3 files changed, 82 insertions(+), 53 deletions(-)
 create mode 100644 subworkflows/local/deeptools.nf

diff --git a/nextflow.config b/nextflow.config
index 7c74fe0c..f477f9dd 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -67,6 +67,7 @@ params {
 
     run { // some steps can be turned on/off for debugging purposes
         qc = true
+        deeptools = true
         normalize_input = true
         call_peaks = true
         gem = true
diff --git a/subworkflows/local/deeptools.nf b/subworkflows/local/deeptools.nf
new file mode 100644
index 00000000..d01fcd8a
--- /dev/null
+++ b/subworkflows/local/deeptools.nf
@@ -0,0 +1,60 @@
+
+include { BAM_COVERAGE             } from "../../modules/local/deeptools.nf"
+include { BIGWIG_SUM               } from "../../modules/local/deeptools.nf"
+include { BED_PROTEIN_CODING       } from "../../modules/local/deeptools.nf"
+include { COMPUTE_MATRIX           } from "../../modules/local/deeptools.nf"
+include { PLOT_FINGERPRINT         } from "../../modules/local/deeptools.nf"
+include { PLOT_CORRELATION         } from "../../modules/local/deeptools.nf"
+include { PLOT_PCA                 } from "../../modules/local/deeptools.nf"
+include { PLOT_HEATMAP             } from "../../modules/local/deeptools.nf"
+include { PLOT_PROFILE             } from "../../modules/local/deeptools.nf"
+
+workflow DEEPTOOLS {
+    take:
+        deduped_bam
+        frag_lengths
+        effective_genome_size
+
+    main:
+
+        deduped_bam.join(frag_lengths).combine(effective_genome_size) | BAM_COVERAGE
+        BAM_COVERAGE.out.bigwig.collect().set{ bigwig_list }
+        BIGWIG_SUM(bigwig_list)
+        BIGWIG_SUM.out.array.combine(Channel.from('heatmap', 'scatterplot')) | PLOT_CORRELATION
+        BIGWIG_SUM.out.array | PLOT_PCA
+
+        // Create channel: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ]
+        deduped_bam
+            .combine(deduped_bam)
+            .map {
+                meta1, bam1, bai1, meta2, bam2, bai2 ->
+                    meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null
+            }
+            .set { ch_ip_ctrl_bam_bai }
+        ch_ip_ctrl_bam_bai | PLOT_FINGERPRINT
+        gene_info | BED_PROTEIN_CODING
+        COMPUTE_MATRIX(bigwig_list,
+                       BED_PROTEIN_CODING.out.bed.combine(Channel.from('metagene','TSS'))
+        )
+        PLOT_HEATMAP(COMPUTE_MATRIX.out.mat)
+        PLOT_PROFILE(COMPUTE_MATRIX.out.mat)
+
+        // Create channel: [ meta, ip_bw, control_bw ]
+        BAM_COVERAGE.out.meta
+            .merge(BAM_COVERAGE.out.bigwig)
+            .set { bigwigs }
+        bigwigs
+            .combine(bigwigs)
+            .map {
+                meta1, bw1, meta2, bw2 ->
+                    meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null
+            }
+            .set { ch_ip_ctrl_bigwig }
+    emit:
+        bigwig              = ch_ip_ctrl_bigwig
+        fingerprint_matrix  = PLOT_FINGERPRINT.out.matrix,
+        fingerprint_metrics = PLOT_FINGERPRINT.out.metrics,
+        corr                = PLOT_CORRELATION.out.tab,
+        pca                 = PLOT_PCA.out.tab,
+        profile             = PLOT_PROFILE.out.tab
+}
diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf
index 4cf958d3..383f6f3f 100644
--- a/subworkflows/local/qc.nf
+++ b/subworkflows/local/qc.nf
@@ -1,3 +1,4 @@
+// modules
 include { FASTQC as FASTQC_RAW     } from "../../modules/local/qc.nf"
 include { FASTQC as FASTQC_TRIMMED } from "../../modules/local/qc.nf"
 include { FASTQ_SCREEN             } from "../../modules/local/qc.nf"
@@ -8,15 +9,8 @@ include { QC_STATS                 } from "../../modules/local/qc.nf"
 include { QC_TABLE                 } from "../../modules/local/qc.nf"
 include { MULTIQC                  } from "../../modules/local/qc.nf"
 
-include { BAM_COVERAGE             } from "../../modules/local/deeptools.nf"
-include { BIGWIG_SUM               } from "../../modules/local/deeptools.nf"
-include { BED_PROTEIN_CODING       } from "../../modules/local/deeptools.nf"
-include { COMPUTE_MATRIX           } from "../../modules/local/deeptools.nf"
-include { PLOT_FINGERPRINT         } from "../../modules/local/deeptools.nf"
-include { PLOT_CORRELATION         } from "../../modules/local/deeptools.nf"
-include { PLOT_PCA                 } from "../../modules/local/deeptools.nf"
-include { PLOT_HEATMAP             } from "../../modules/local/deeptools.nf"
-include { PLOT_PROFILE             } from "../../modules/local/deeptools.nf"
+// subworkflows
+include { DEEPTOOLS                } from "../../subworkflows/local/deeptools.nf"
 
 workflow QC {
     take:
@@ -72,42 +66,6 @@ workflow QC {
         QC_STATS( qc_stats_input )
         QC_TABLE( QC_STATS.out.collect() )
 
-        // Deeptools
-
-        deduped_bam.join(frag_lengths).combine(effective_genome_size) | BAM_COVERAGE
-        BAM_COVERAGE.out.bigwig.collect().set{ bigwig_list }
-        BIGWIG_SUM(bigwig_list)
-        BIGWIG_SUM.out.array.combine(Channel.from('heatmap', 'scatterplot')) | PLOT_CORRELATION
-        BIGWIG_SUM.out.array | PLOT_PCA
-
-        // Create channel: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ]
-        deduped_bam
-            .combine(deduped_bam)
-            .map {
-                meta1, bam1, bai1, meta2, bam2, bai2 ->
-                    meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null
-            }
-            .set { ch_ip_ctrl_bam_bai }
-        ch_ip_ctrl_bam_bai | PLOT_FINGERPRINT
-        gene_info | BED_PROTEIN_CODING
-        COMPUTE_MATRIX(bigwig_list,
-                       BED_PROTEIN_CODING.out.bed.combine(Channel.from('metagene','TSS'))
-        )
-        PLOT_HEATMAP(COMPUTE_MATRIX.out.mat)
-        PLOT_PROFILE(COMPUTE_MATRIX.out.mat)
-
-        // Create channel: [ meta, ip_bw, control_bw ]
-        BAM_COVERAGE.out.meta
-            .merge(BAM_COVERAGE.out.bigwig)
-            .set { bigwigs }
-        bigwigs
-            .combine(bigwigs)
-            .map {
-                meta1, bw1, meta2, bw2 ->
-                    meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null
-            }
-            .set { ch_ip_ctrl_bigwig }
-
         deduped_flagstat
             .map { meta, flagstat, idxstat ->
                 [ flagstat, idxstat ]
@@ -118,22 +76,32 @@ workflow QC {
                 [ spp ]
             }
             .set{ ppqt_spp_files }
-
         ch_multiqc = ch_multiqc.mix(
             FASTQC_RAW.out.zip,
             FASTQC_TRIMMED.out.zip,
             dedup_flagstat_files,
             ppqt_spp_files,
-            QC_TABLE.out.txt,
-            PLOT_FINGERPRINT.out.matrix,
-            PLOT_FINGERPRINT.out.metrics,
-            PLOT_CORRELATION.out.tab,
-            PLOT_PCA.out.tab,
-            PLOT_PROFILE.out.tab
+            QC_TABLE.out.txt
         )
 
+        ch_ip_ctrl_bigwig = Channel.empty()
+        if (params.run.deeptools) {
+            DEEPTOOLS( deduped_bam,
+                       frag_lengths,
+                       effective_genome_size
+                     )
+            ch_ip_ctrl_bigwig = DEEPTOOLS.out.bigwig
+            ch_multiqc = ch_multiqc.mix(
+                DEEPTOOLS.out.fingerprint_matrix,
+                DEEPTOOLS.out.fingerprint_metrics,
+                DEEPTOOLS.out.corr,
+                DEEPTOOLS.out.pca,
+                DEEPTOOLS.out.profile
+            )
+        }
+
     emit:
-        bigwigs = ch_ip_ctrl_bigwig
+        bigwigs       = ch_ip_ctrl_bigwig
         multiqc_input = ch_multiqc
 
 }

From 194d7005397600a96c83bfce80dc37cb07cda86d Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Oct 2023 19:09:12 -0400
Subject: [PATCH 04/26] fix: move withName selectors to modules.config

---
 conf/base.config    |  5 -----
 conf/modules.config | 21 +++++++++++----------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 6c7a4a17..b99078a6 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -10,7 +10,6 @@
 
 process {
 
-    // TODO nf-core: Check the defaults for all processes
     cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
     memory = { check_max( 6.GB * task.attempt, 'memory' ) }
     time   = { check_max( 4.h  * task.attempt, 'time'   ) }
@@ -59,10 +58,6 @@ process {
         errorStrategy = 'retry'
         maxRetries    = 2
     }
-    /*
-    withName:CUSTOM_DUMPSOFTWAREVERSIONS {
-        cache = false
-    }*/
 
     // Custom CCBR resource requirements
     withLabel:process_higher {
diff --git a/conf/modules.config b/conf/modules.config
index 2496842b..536a55f6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -8,20 +8,21 @@ process {
 
     errorStrategy = 'finish'
 
-    withName: 'INPUT_CHECK:SAMPLESHEET_CHECK' {
+    /*
+    withName: '.*CUSTOM_DUMPSOFTWAREVERSIONS' {
+        cache = false
+        publishDir = [
+            path: { "${params.outdir}/pipeline_info" },
+            mode: params.publish_dir_mode,
+            pattern: '*_versions.yml'
+        ]
+    }*/
+
+    withName: '.*INPUT_CHECK:SAMPLESHEET_CHECK' {
         publishDir = [
             path: { "${params.outdir}/pipeline_info" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
-    withName: 'CUTADAPT' {
-        ext.args = [
-            '--nextseq-trim=2',
-            '--trim-n -n 5 -O 5',
-            '-q 10,10',
-            '-m 20',
-            '-b file:/opt2/TruSeq_and_nextera_adapters.consolidated.fa'
-        ].join(' ').trim()
-    }
 }

From 7685049160fccea4cf9675456011b754eb258542 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Oct 2023 19:19:58 -0400
Subject: [PATCH 05/26] fix: deeptools subwf syntax

---
 conf/test.config                |  1 +
 main.nf                         |  4 ++++
 subworkflows/local/deeptools.nf | 10 ++++++----
 subworkflows/local/qc.nf        |  3 ++-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 8a1d203c..f1d813c0 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -18,6 +18,7 @@ params {
     deeptools.excluded_chroms = 'chrM'
     run {
         qc = true
+        deeptools = true
         normalize_input = true
         call_peaks = true
         gem = true
diff --git a/main.nf b/main.nf
index a9812535..4c3d1be1 100644
--- a/main.nf
+++ b/main.nf
@@ -41,6 +41,10 @@ workflow MAKE_REFERENCE {
 
 // MAIN WORKFLOW
 workflow {
+    CHIPSEQ()
+}
+
+workflow CHIPSEQ {
     INPUT_CHECK(file(params.input), params.seq_center)
     INPUT_CHECK.out.reads.set { raw_fastqs }
     raw_fastqs | CUTADAPT
diff --git a/subworkflows/local/deeptools.nf b/subworkflows/local/deeptools.nf
index d01fcd8a..da601911 100644
--- a/subworkflows/local/deeptools.nf
+++ b/subworkflows/local/deeptools.nf
@@ -14,6 +14,7 @@ workflow DEEPTOOLS {
         deduped_bam
         frag_lengths
         effective_genome_size
+        gene_info
 
     main:
 
@@ -50,11 +51,12 @@ workflow DEEPTOOLS {
                     meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null
             }
             .set { ch_ip_ctrl_bigwig }
+
     emit:
         bigwig              = ch_ip_ctrl_bigwig
-        fingerprint_matrix  = PLOT_FINGERPRINT.out.matrix,
-        fingerprint_metrics = PLOT_FINGERPRINT.out.metrics,
-        corr                = PLOT_CORRELATION.out.tab,
-        pca                 = PLOT_PCA.out.tab,
+        fingerprint_matrix  = PLOT_FINGERPRINT.out.matrix
+        fingerprint_metrics = PLOT_FINGERPRINT.out.metrics
+        corr                = PLOT_CORRELATION.out.tab
+        pca                 = PLOT_PCA.out.tab
         profile             = PLOT_PROFILE.out.tab
 }
diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf
index 383f6f3f..3479ded9 100644
--- a/subworkflows/local/qc.nf
+++ b/subworkflows/local/qc.nf
@@ -88,7 +88,8 @@ workflow QC {
         if (params.run.deeptools) {
             DEEPTOOLS( deduped_bam,
                        frag_lengths,
-                       effective_genome_size
+                       effective_genome_size,
+                       gene_info
                      )
             ch_ip_ctrl_bigwig = DEEPTOOLS.out.bigwig
             ch_multiqc = ch_multiqc.mix(

From 78249174ce2c07126fc7db464c4835097eedc3e7 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Oct 2023 19:33:15 -0400
Subject: [PATCH 06/26] test: move cli-specific test to separate subdir

---
 .github/workflows/build.yml                | 8 ++++----
 nextflow.config                            | 6 ------
 tests/.gitignore                           | 7 -------
 tests/cli/.gitignore                       | 4 ++++
 {conf => tests/cli}/ci_stub.config         | 0
 {conf => tests/cli}/ci_test.config         | 0
 {assets => tests/cli}/fastq_screen_ci.conf | 0
 7 files changed, 8 insertions(+), 17 deletions(-)
 delete mode 100644 tests/.gitignore
 create mode 100644 tests/cli/.gitignore
 rename {conf => tests/cli}/ci_stub.config (100%)
 rename {conf => tests/cli}/ci_test.config (100%)
 rename {assets => tests/cli}/fastq_screen_ci.conf (100%)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index de1e513e..4314b5ab 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -41,16 +41,16 @@ jobs:
           pip install .[dev,test]
       - name: Stub run
         run: |
-          cd tests/
+          cd tests/cli
           which champagne
           champagne init
-          champagne run -profile ci_stub -stub
+          champagne run -stub -c ci_stub
       - name: Test run
         if: ${{ env.test_run == 'true' }}
         run: |
-          cd tests/
+          cd tests/cli
           champagne init
-          champagne run -profile ci_test,docker
+          champagne run -profile docker -c ci_test.config
       - name: "Upload Artifact"
         uses: actions/upload-artifact@v3
         if: always() # run even if previous steps fail
diff --git a/nextflow.config b/nextflow.config
index f477f9dd..34f9a3c2 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -113,12 +113,6 @@ profiles {
     full_mm10 {
         includeConfig "conf/full_mm10.config"
     }
-    ci_stub {
-        includeConfig "conf/ci_stub.config"
-    }
-    ci_test {
-        includeConfig "conf/ci_test.config"
-    }
 }
 
 includeConfig 'conf/genomes.config'
diff --git a/tests/.gitignore b/tests/.gitignore
deleted file mode 100644
index 29db90be..00000000
--- a/tests/.gitignore
+++ /dev/null
@@ -1,7 +0,0 @@
-assets/
-conf/
-results/
-work/
-
-nextflow.config
-submit_slurm.sh
diff --git a/tests/cli/.gitignore b/tests/cli/.gitignore
new file mode 100644
index 00000000..810a465d
--- /dev/null
+++ b/tests/cli/.gitignore
@@ -0,0 +1,4 @@
+/assets/
+/conf/
+/nextflow.config
+/submit_slurm.sh
diff --git a/conf/ci_stub.config b/tests/cli/ci_stub.config
similarity index 100%
rename from conf/ci_stub.config
rename to tests/cli/ci_stub.config
diff --git a/conf/ci_test.config b/tests/cli/ci_test.config
similarity index 100%
rename from conf/ci_test.config
rename to tests/cli/ci_test.config
diff --git a/assets/fastq_screen_ci.conf b/tests/cli/fastq_screen_ci.conf
similarity index 100%
rename from assets/fastq_screen_ci.conf
rename to tests/cli/fastq_screen_ci.conf

From 59d52bcd9dcca089449d084720507d6d02934904 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Oct 2023 21:43:12 -0400
Subject: [PATCH 07/26] fix: ignore output dir

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 1905fa74..b504e6de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ replay_pid*
 /work*/
 /data/
 /results/
+/output/
 /params.yaml
 
 # python packaging

From 30e28201a829a0eac39f0f730c0e63ea66dc9239 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Mon, 30 Oct 2023 15:59:49 -0400
Subject: [PATCH 08/26] style: sort df rows by sample name

---
 bin/createtable.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/createtable.py b/bin/createtable.py
index a7aab17c..b14314d1 100755
--- a/bin/createtable.py
+++ b/bin/createtable.py
@@ -33,6 +33,7 @@ def file2table():
     df = pd.DataFrame(tabledict)
     df.index.name = "SampleName"
     df.reset_index(inplace=True)
+    df = df.sort_values(by="SampleName")
 
     # sometimes preseq fails, resulting in some columns not being present.
     # so this only keeps columns that exist in the dict.

From 3b4be6a0ff4809870fa78417cf61205105ef00bd Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Mon, 30 Oct 2023 16:01:41 -0400
Subject: [PATCH 09/26] fix: increase default resources across process
 selectors

---
 conf/base.config             | 17 +++++------------
 modules/CCBR/bwa/mem/main.nf |  2 +-
 modules/local/deduplicate.nf |  2 +-
 modules/local/deeptools.nf   |  8 ++++----
 modules/local/qc.nf          |  3 +--
 5 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index b99078a6..c5251257 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -31,19 +31,19 @@ process {
         time   = { check_max( 4.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_low {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
+        cpus   = { check_max( 4     * task.attempt, 'cpus'    ) }
         memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 4.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_medium {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
+        cpus   = { check_max( 16    * task.attempt, 'cpus'    ) }
         memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 8.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_high {
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus   = { check_max( 32     * task.attempt, 'cpus'    ) }
+        memory = { check_max( 120.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 16.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_long {
         time   = { check_max( 20.h  * task.attempt, 'time'    ) }
@@ -58,11 +58,4 @@ process {
         errorStrategy = 'retry'
         maxRetries    = 2
     }
-
-    // Custom CCBR resource requirements
-    withLabel:process_higher {
-        cpus   = { check_max( 32    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 120.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
-    }
 }
diff --git a/modules/CCBR/bwa/mem/main.nf b/modules/CCBR/bwa/mem/main.nf
index fe70049f..e9fa59f8 100644
--- a/modules/CCBR/bwa/mem/main.nf
+++ b/modules/CCBR/bwa/mem/main.nf
@@ -1,6 +1,6 @@
 process BWA_MEM {
     tag { meta.id }
-    label 'process_higher'
+    label 'process_high'
 
     container 'nciccbr/ccbr_ubuntu_base_20.04:v5'
 
diff --git a/modules/local/deduplicate.nf b/modules/local/deduplicate.nf
index 7d574a7a..5153ff19 100644
--- a/modules/local/deduplicate.nf
+++ b/modules/local/deduplicate.nf
@@ -44,7 +44,7 @@ process MACS2_DEDUP {
 process PICARD_DEDUP {
     tag { meta.id }
     label 'dedup'
-    label 'process_higher'
+    label 'process_high'
 
     container "${params.containers.picard}"
 
diff --git a/modules/local/deeptools.nf b/modules/local/deeptools.nf
index 42bd5a48..3a5db905 100644
--- a/modules/local/deeptools.nf
+++ b/modules/local/deeptools.nf
@@ -3,7 +3,7 @@ process BAM_COVERAGE {
     tag { meta.id }
     label 'qc'
     label 'deeptools'
-    label 'process_higher'
+    label 'process_high'
 
     container = "${params.containers.deeptools}"
 
@@ -125,7 +125,7 @@ process PLOT_PCA {
 process PLOT_FINGERPRINT {
   label 'qc'
   label 'deeptools'
-  label 'process_higher'
+  label 'process_high'
 
   container = "${params.containers.deeptools}"
 
@@ -187,7 +187,7 @@ process BED_PROTEIN_CODING {
 process COMPUTE_MATRIX {
   label 'qc'
   label 'deeptools'
-  label 'process_higher'
+  label 'process_high'
 
   container = "${params.containers.deeptools}"
 
@@ -303,7 +303,7 @@ process PLOT_PROFILE {
 process NORMALIZE_INPUT {
   label 'qc'
   label 'deeptools'
-  label 'process_higher'
+  label 'process_high'
 
   container = "${params.containers.deeptools}"
 
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 158782d2..5f844ca4 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -2,7 +2,7 @@
 process FASTQC {
     tag { meta.id }
     label 'qc'
-    label 'process_higher'
+    label 'process_high'
     publishDir "${params.outdir}/qc/fastqc_${fqtype}/${meta.id}", mode: "${params.publish_dir_mode}"
 
     container = "${params.containers.fastqc}"
@@ -258,7 +258,6 @@ process QC_STATS {
         path("${meta.id}.qc_stats.txt")
 
     script:
-    // TODO: handle paired reads
     def outfile = "${meta.id}.qc_stats.txt"
     """
     touch ${outfile}

From 86dcab1188154119cdde3c1b15a70b570aac9c53 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Oct 2023 09:37:40 -0400
Subject: [PATCH 10/26] fix: increase memory for samtools sort

---
 modules/CCBR/samtools/sort/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/CCBR/samtools/sort/main.nf b/modules/CCBR/samtools/sort/main.nf
index 359b871d..5cf9e8d5 100644
--- a/modules/CCBR/samtools/sort/main.nf
+++ b/modules/CCBR/samtools/sort/main.nf
@@ -1,6 +1,6 @@
 process SAMTOOLS_SORT {
     tag { meta.id }
-    label 'process_medium'
+    label 'process_high'
 
     container 'nciccbr/ccbr_ubuntu_base_20.04:v6'
 

From fee3d57435f524cdfdaf58cacb6230060e79f369 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Oct 2023 10:30:10 -0400
Subject: [PATCH 11/26] fix: correct fastq count method

---
 modules/CCBR/custom/countfastq/main.nf        | 10 ++++------
 .../countfastq/templates/count-fastq.py       | 19 +++++++++++++++++++
 modules/local/qc.nf                           |  5 +++--
 subworkflows/local/qc.nf                      |  1 -
 4 files changed, 26 insertions(+), 9 deletions(-)
 create mode 100644 modules/CCBR/custom/countfastq/templates/count-fastq.py

diff --git a/modules/CCBR/custom/countfastq/main.nf b/modules/CCBR/custom/countfastq/main.nf
index e029e4ac..ddaf682b 100644
--- a/modules/CCBR/custom/countfastq/main.nf
+++ b/modules/CCBR/custom/countfastq/main.nf
@@ -3,19 +3,17 @@ process CUSTOM_COUNTFASTQ {
     tag { meta.id }
     label 'process_single'
 
-    container 'nciccbr/ccbr_ubuntu_base_20.04:v5'
+    container 'nciccbr/ccbr_ubuntu_base_20.04:v6.1'
 
     input:
         tuple val(meta), path(fastq)
 
     output:
-        tuple val(meta), env(count), emit: count
+        tuple val(meta), path("*.txt"), emit: count
 
     script:
-    """
-    count=`zcat ${fastq} | grep "^@" | wc -l`
-    echo \$count
-    """
+    def txt_filename = "${meta.baseName}.txt"
+    template 'count-fastq.py'
 
     stub:
     """
diff --git a/modules/CCBR/custom/countfastq/templates/count-fastq.py b/modules/CCBR/custom/countfastq/templates/count-fastq.py
new file mode 100644
index 00000000..d83ee8fa
--- /dev/null
+++ b/modules/CCBR/custom/countfastq/templates/count-fastq.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+import Bio.SeqIO
+import gzip
+import os
+
+
+def main():
+    count = 0
+    for fastq_filename in "${fastq}".split():
+        with gzip.open(fastq_filename, "rt") as file_handle:
+            n_seqs = sum(1 for rec in Bio.SeqIO.parse(file_handle, "fastq"))
+        count += n_seqs
+    with open("${meta.id}.count.txt", "w") as out_file:
+        out_file.write(str(count))
+    return count
+
+
+if __name__ == "__main__":
+    print(main())
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 5f844ca4..576703fe 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -251,7 +251,7 @@ process QC_STATS {
     container = "${params.containers.base}"
 
     input:
-        tuple val(meta), path(raw_fastq), val(n_reads_after_blacklist), path(aligned_flagstat), path(filtered_flagstat), path(dedup_flagstat), path(idxstat), path(preseq_nrf), path(ppqt_spp), val(fraglen)
+        tuple val(meta), path(raw_fastq), path(count_file_blacklist), path(aligned_flagstat), path(filtered_flagstat), path(dedup_flagstat), path(idxstat), path(preseq_nrf), path(ppqt_spp), val(fraglen)
 
 
     output:
@@ -266,7 +266,8 @@ process QC_STATS {
     zcat ${raw_fastq} | wc -l | filterMetrics.py ${meta.id} tnreads >> ${outfile}
 
     # Number of reads after blacklist filter
-    echo -e "${meta.id}\\tN_reads_surviving_blacklist\\t${n_reads_after_blacklist}" >> ${outfile}
+    n_reads_after_blacklist=`cat ${count_file_blacklist}`
+    echo -e "${meta.id}\\tN_reads_surviving_blacklist\\t\${n_reads_after_blacklist}" >> ${outfile}
 
     # Number of mapped reads
     grep 'mapped (' ${aligned_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile}
diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf
index 3479ded9..75e6975c 100644
--- a/subworkflows/local/qc.nf
+++ b/subworkflows/local/qc.nf
@@ -54,7 +54,6 @@ workflow QC {
             .concat(HANDLE_PRESEQ_ERROR.out.nrf)
             .set{ preseq_nrf }
 
-        // TODO: order of items in channel is not guaranteed. Need to create single channel with all files for QC stats with same metadata
         qc_stats_input = raw_fastqs
             .join(n_reads_surviving_blacklist)
             .join(aligned_flagstat)

From e1bccec04395386a2f9f5d97d165a0792e581341 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Oct 2023 16:16:31 -0400
Subject: [PATCH 12/26] feat: add script to compare tables

used for comparing old vs new pipeline stats (#122)
---
 bin/compare-tables.R | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 bin/compare-tables.R

diff --git a/bin/compare-tables.R b/bin/compare-tables.R
new file mode 100644
index 00000000..d8214588
--- /dev/null
+++ b/bin/compare-tables.R
@@ -0,0 +1,15 @@
+library(tidyverse)
+
+original <- read.table("QCTable.txt", header = TRUE) %>%
+  as_tibble() %>%
+  mutate(across(contains("reads"), as.integer)) %>%
+  select(c("SampleName", contains("reads"))) %>%
+  pivot_longer(-SampleName, values_to = "value_orig")
+new <- read_tsv("qc_table.tsv") %>%
+  select(SampleName, original %>% pull(name)) %>%
+  pivot_longer(-SampleName, values_to = "value_new")
+
+
+inner_join(original, new) %>%
+  mutate(rel_diff_percent = round(100 * (value_new - value_orig) / value_orig, 2)) %>%
+  View()

From e8847bc41e64c4cab4b2da3bb6bbd117ab3e5fcd Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Oct 2023 20:25:40 -0400
Subject: [PATCH 13/26] fix: increase resources for samtools index

---
 modules/local/samtools_index.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/samtools_index.nf b/modules/local/samtools_index.nf
index f64f73cd..d4848313 100644
--- a/modules/local/samtools_index.nf
+++ b/modules/local/samtools_index.nf
@@ -1,6 +1,6 @@
 process SAMTOOLS_INDEX { // TODO create/use flagstat & idxstat module in nf-modules
     tag { meta.id }
-    label 'process_medium'
+    label 'process_high'
 
     container = "${params.containers.base}"
 

From 8f117e69a66cbabbb8bb83e2929be7930d1024d2 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Oct 2023 20:26:02 -0400
Subject: [PATCH 14/26] fix: write table as tsv

---
 bin/createtable.py  | 4 ++--
 modules/local/qc.nf | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/bin/createtable.py b/bin/createtable.py
index b14314d1..a4ab4845 100755
--- a/bin/createtable.py
+++ b/bin/createtable.py
@@ -57,8 +57,8 @@ def file2table():
         ]
         if col in df_columns
     ]
-
-    print(df[column_order].to_string(index=False, justify="left"))
+    df = df[column_order]
+    df.to_csv("qc_table.tsv", sep="\t", index=False)
 
 
 if __name__ == "__main__":
diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 576703fe..12978425 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -304,11 +304,11 @@ process QC_TABLE {
         path(qc_stats)
 
     output:
-        path("qc_table.txt"), emit: txt
+        path("qc_table.tsv"), emit: txt
 
     script:
     """
-    cat ${qc_stats.join(' ')} | createtable.py > qc_table.txt
+    cat ${qc_stats.join(' ')} | createtable.py
     """
 
     stub:

From ce264b783fb40c21ab7fa4aa7adadbd4813d81d6 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Oct 2023 21:09:10 -0400
Subject: [PATCH 15/26] feat: count # of peaks

---
 bin/count-peaks.R | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 bin/count-peaks.R

diff --git a/bin/count-peaks.R b/bin/count-peaks.R
new file mode 100644
index 00000000..5bc93385
--- /dev/null
+++ b/bin/count-peaks.R
@@ -0,0 +1,25 @@
+library(tidyverse)
+peak_counts <- read_tsv("peak_meta.tsv") %>%
+  group_by(sample_id, tool) %>%
+  count() %>%
+  rename(count_new = n)
+peak_counts %>%
+  pull(tool) %>%
+  unique()
+
+peaks_old <- read_tsv("old_peak_counts.tsv") %>%
+  mutate(tool = str_remove(file, "/.*")) %>%
+  mutate(
+    tool = case_when(
+      tool == "macsBroad" ~ "macs_broad",
+      tool == "macsNarrow" ~ "macs_narrow",
+      TRUE ~ tool
+    ),
+    sample_id = str_replace(file, ".*/(.*)/.*", "\\1"),
+  ) %>%
+  rename(count_old = count) %>%
+  select(sample_id, tool, count_old)
+
+inner_join(peaks_old, peak_counts) %>%
+  mutate(rel_diff_percent = round(100 * (count_new - count_old) / count_old, 2)) %>%
+  View()

From 2ea281163dd3e0a103186efb3e367ca77139e873 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Oct 2023 21:10:44 -0400
Subject: [PATCH 16/26] fix: renamed qc_table.txt to tsv

---
 assets/multiqc_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml
index 9bc0c5d2..2e921b59 100644
--- a/assets/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -169,7 +169,7 @@ custom_data:
 
 sp:
   QC_Table:
-    fn: "qc_table.txt"
+    fn: "qc_table.tsv"
   NGSQC_data:
     fn: "*NGSQC.txt"
   frip_samples:

From a55efcb6bdebf082defface235365b1ab74034d8 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 14:18:47 -0400
Subject: [PATCH 17/26] chore: update modules & submodules from remotes

---
 modules.json                                  | 32 +++++------
 .../nf-core/bedtools/getfasta/environment.yml |  6 +++
 modules/nf-core/bedtools/getfasta/main.nf     |  2 +-
 modules/nf-core/bwa/index/main.nf             | 53 -------------------
 modules/nf-core/bwa/index/meta.yml            | 45 ----------------
 modules/nf-core/bwa/index/tests/main.nf.test  | 33 ------------
 .../nf-core/bwa/index/tests/main.nf.test.snap | 43 ---------------
 modules/nf-core/bwa/index/tests/tags.yml      |  2 -
 subworkflows/CCBR/filter_blacklist/main.nf    |  7 +--
 9 files changed, 27 insertions(+), 196 deletions(-)
 create mode 100644 modules/nf-core/bedtools/getfasta/environment.yml
 delete mode 100644 modules/nf-core/bwa/index/main.nf
 delete mode 100644 modules/nf-core/bwa/index/meta.yml
 delete mode 100644 modules/nf-core/bwa/index/tests/main.nf.test
 delete mode 100644 modules/nf-core/bwa/index/tests/main.nf.test.snap
 delete mode 100644 modules/nf-core/bwa/index/tests/tags.yml

diff --git a/modules.json b/modules.json
index f0470703..3232c12b 100644
--- a/modules.json
+++ b/modules.json
@@ -12,8 +12,13 @@
                     },
                     "bwa/mem": {
                         "branch": "main",
-                        "git_sha": "ca4f84b4c2ca84eb0449b4ba414a8b8052f8d90a",
-                        "installed_by": ["filter_blacklist", "modules"]
+                        "git_sha": "7887b0e0dc5a0320d8ba84c2763ef8692c358087",
+                        "installed_by": ["modules", "filter_blacklist"]
+                    },
+                    "custom/countfastq": {
+                        "branch": "main",
+                        "git_sha": "2ccd43e3734de30fe61ed0ff80e6e3252929505e",
+                        "installed_by": ["filter_blacklist"]
                     },
                     "cutadapt": {
                         "branch": "main",
@@ -27,7 +32,7 @@
                     },
                     "picard/samtofastq": {
                         "branch": "main",
-                        "git_sha": "258d0f336ea1f851ab4223d295bb18b6dc187899",
+                        "git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a",
                         "installed_by": ["filter_blacklist"]
                     },
                     "samtools/filteraligned": {
@@ -35,9 +40,14 @@
                         "git_sha": "879e969c593ab9f321301ac15722728ab30cea49",
                         "installed_by": ["filter_blacklist"]
                     },
+                    "samtools/flagstat": {
+                        "branch": "main",
+                        "git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a",
+                        "installed_by": ["modules"]
+                    },
                     "samtools/sort": {
                         "branch": "main",
-                        "git_sha": "d55ab2580b69a81aa0534a3018cc6e6ea3b28640",
+                        "git_sha": "5b39869abfc740c6243d18a3cd84aa7d78787125",
                         "installed_by": ["modules"]
                     }
                 }
@@ -46,7 +56,7 @@
                 "CCBR": {
                     "filter_blacklist": {
                         "branch": "main",
-                        "git_sha": "bb7dbb42afe47d7e02b2f21e3352720ca2996e11",
+                        "git_sha": "b7764378fac18bea8c84f9dd39cb595241b6e796",
                         "installed_by": ["subworkflows"]
                     }
                 }
@@ -57,17 +67,7 @@
                 "nf-core": {
                     "bedtools/getfasta": {
                         "branch": "master",
-                        "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
-                        "installed_by": ["modules"]
-                    },
-                    "bwa/index": {
-                        "branch": "master",
-                        "git_sha": "28a23ea6529caff44855c774f439a4074883027c",
-                        "installed_by": ["modules"]
-                    },
-                    "samtools/flagstat": {
-                        "branch": "master",
-                        "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
+                        "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
                         "installed_by": ["modules"]
                     }
                 }
diff --git a/modules/nf-core/bedtools/getfasta/environment.yml b/modules/nf-core/bedtools/getfasta/environment.yml
new file mode 100644
index 00000000..55ce727a
--- /dev/null
+++ b/modules/nf-core/bedtools/getfasta/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::bedtools=2.30.0
diff --git a/modules/nf-core/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf
index 84adc4c9..53982e11 100644
--- a/modules/nf-core/bedtools/getfasta/main.nf
+++ b/modules/nf-core/bedtools/getfasta/main.nf
@@ -2,7 +2,7 @@ process BEDTOOLS_GETFASTA {
     tag "$bed"
     label 'process_single'
 
-    conda "bioconda::bedtools=2.30.0"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' :
         'biocontainers/bedtools:2.30.0--hc088bd4_0' }"
diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf
deleted file mode 100644
index c30d194d..00000000
--- a/modules/nf-core/bwa/index/main.nf
+++ /dev/null
@@ -1,53 +0,0 @@
-process BWA_INDEX {
-    tag "$fasta"
-    label 'process_single'
-
-    conda "bioconda::bwa=0.7.17"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' :
-        'biocontainers/bwa:0.7.17--hed695b0_7' }"
-
-    input:
-    tuple val(meta), path(fasta)
-
-    output:
-    tuple val(meta), path(bwa) , emit: index
-    path "versions.yml"        , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def prefix = task.ext.prefix ?: "${fasta.baseName}"
-    def args   = task.ext.args ?: ''
-    """
-    mkdir bwa
-    bwa \\
-        index \\
-        $args \\
-        -p bwa/${prefix} \\
-        $fasta
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
-    END_VERSIONS
-    """
-
-    stub:
-    def prefix = task.ext.prefix ?: "${fasta.baseName}"
-    """
-    mkdir bwa
-
-    touch bwa/${prefix}.amb
-    touch bwa/${prefix}.ann
-    touch bwa/${prefix}.bwt
-    touch bwa/${prefix}.pac
-    touch bwa/${prefix}.sa
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml
deleted file mode 100644
index 730628d0..00000000
--- a/modules/nf-core/bwa/index/meta.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-name: bwa_index
-description: Create BWA index for reference genome
-keywords:
-  - index
-  - fasta
-  - genome
-  - reference
-tools:
-  - bwa:
-      description: |
-        BWA is a software package for mapping DNA sequences against
-        a large reference genome, such as the human genome.
-      homepage: http://bio-bwa.sourceforge.net/
-      documentation: http://www.htslib.org/doc/samtools.html
-      arxiv: arXiv:1303.3997
-      licence: ["GPL-3.0-or-later"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing reference information.
-        e.g. [ id:'test', single_end:false ]
-  - fasta:
-      type: file
-      description: Input genome fasta file
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing reference information.
-        e.g. [ id:'test', single_end:false ]
-  - index:
-      type: file
-      description: BWA genome index files
-      pattern: "*.{amb,ann,bwt,pac,sa}"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-authors:
-  - "@drpatelh"
-  - "@maxulysse"
-maintainers:
-  - "@drpatelh"
-  - "@maxulysse"
diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test
deleted file mode 100644
index 2f33c0e8..00000000
--- a/modules/nf-core/bwa/index/tests/main.nf.test
+++ /dev/null
@@ -1,33 +0,0 @@
-nextflow_process {
-
-    name "Test Process BWA_INDEX"
-    tag "modules_nfcore"
-    tag "modules"
-    tag "bwa"
-    tag "bwa/index"
-    script "../main.nf"
-    process "BWA_INDEX"
-
-    test("BWA index") {
-
-        when {
-            process {
-                """
-                input[0] = [
-                    [id: 'test'],
-                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
-                ]
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(process.out).match() }
-            )
-        }
-
-    }
-
-}
diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap
deleted file mode 100644
index 492d8f6a..00000000
--- a/modules/nf-core/bwa/index/tests/main.nf.test.snap
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-    "BWA index": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e",
-                            "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567",
-                            "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da",
-                            "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66",
-                            "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1"
-                        ]
-                    ]
-                ],
-                "1": [
-                    "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f"
-                ],
-                "index": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        [
-                            "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e",
-                            "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567",
-                            "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da",
-                            "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66",
-                            "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1"
-                        ]
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f"
-                ]
-            }
-        ],
-        "timestamp": "2023-10-17T17:20:20.180927714"
-    }
-}
diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml
deleted file mode 100644
index 28bb483c..00000000
--- a/modules/nf-core/bwa/index/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-bwa/index:
-  - modules/nf-core/bwa/index/**
diff --git a/subworkflows/CCBR/filter_blacklist/main.nf b/subworkflows/CCBR/filter_blacklist/main.nf
index 3438b260..4c11dc7e 100644
--- a/subworkflows/CCBR/filter_blacklist/main.nf
+++ b/subworkflows/CCBR/filter_blacklist/main.nf
@@ -21,11 +21,12 @@ workflow FILTER_BLACKLIST {
         ch_versions = ch_versions.mix(
             BWA_MEM.out.versions,
             SAMTOOLS_FILTERALIGNED.out.versions,
-            PICARD_SAMTOFASTQ.out.versions
+            PICARD_SAMTOFASTQ.out.versions,
+            CUSTOM_COUNTFASTQ.out.versions
         )
 
     emit:
-        reads =  PICARD_SAMTOFASTQ.out.paired  // channel: [ val(meta), path(fastq) ]
+        reads             = PICARD_SAMTOFASTQ.out.paired  // channel: [ val(meta), path(fastq) ]
         n_surviving_reads = CUSTOM_COUNTFASTQ.out.count
-        versions = ch_versions           // channel: [ path(versions.yml) ]
+        versions          = ch_versions           // channel: [ path(versions.yml) ]
 }

From 690415296aed828fd1dd1d1224a134b7d00c688e Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 14:21:47 -0400
Subject: [PATCH 18/26] fix(cli): only set -profile flag if values aren't empty

---
 src/util.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/util.py b/src/util.py
index c4e79851..42b860c8 100644
--- a/src/util.py
+++ b/src/util.py
@@ -178,7 +178,8 @@ def run_nextflow(
         profiles.add("slurm")
     if hpc:
         profiles.add(hpc_options[hpc]["profile"])
-    args_dict["-profile"] = ",".join(sorted(profiles))
+    if profiles:
+        args_dict["-profile"] = ",".join(sorted(profiles))
     nextflow_command += list(f"{k} {v}" for k, v in args_dict.items())
 
     # Print nextflow command

From 332b7bafd6a2b1af21c8f750534a04e8e5ea84fc Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 14:25:34 -0400
Subject: [PATCH 19/26] ci: fix typo in config filename

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 4314b5ab..48d48bf0 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -44,7 +44,7 @@ jobs:
           cd tests/cli
           which champagne
           champagne init
-          champagne run -stub -c ci_stub
+          champagne run -stub -c ci_stub.config
       - name: Test run
         if: ${{ env.test_run == 'true' }}
         run: |

From 798d50bca89a4c84fd22accef7eb966e819b851f Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 14:36:24 -0400
Subject: [PATCH 20/26] fix: update countfastq module from remote

---
 modules/CCBR/custom/countfastq/main.nf        |  8 ++++--
 modules/CCBR/custom/countfastq/meta.yml       | 26 ++++++++++---------
 .../countfastq/templates/count-fastq.py       | 12 +++++++--
 3 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/modules/CCBR/custom/countfastq/main.nf b/modules/CCBR/custom/countfastq/main.nf
index ddaf682b..18df8c37 100644
--- a/modules/CCBR/custom/countfastq/main.nf
+++ b/modules/CCBR/custom/countfastq/main.nf
@@ -10,14 +10,18 @@ process CUSTOM_COUNTFASTQ {
 
     output:
         tuple val(meta), path("*.txt"), emit: count
+        path('versions.yml'),           emit: versions
+
+    when:
+        task.ext.when == null || task.ext.when
 
     script:
-    def txt_filename = "${meta.baseName}.txt"
     template 'count-fastq.py'
 
     stub:
     """
     count=-1
-    echo \$count
+    echo \$count > ${meta.id}.count.txt
+    touch versions.yml
     """
 }
diff --git a/modules/CCBR/custom/countfastq/meta.yml b/modules/CCBR/custom/countfastq/meta.yml
index b3c0c13d..0e1cda48 100644
--- a/modules/CCBR/custom/countfastq/meta.yml
+++ b/modules/CCBR/custom/countfastq/meta.yml
@@ -1,19 +1,17 @@
 name: custom_countfastq
 description: |
   Count reads in a fastq file
-
 keywords:
   - fastq
+  - biopython
+  - python
 tools:
-  - grep:
-    description: |
-      a command-line utility for searching plain-text data sets for lines that match a regular expression
-  - wc:
-    description: |
-      a command-line utility for counting newlines, words, and bytes in plain-text data
-  - zcat:
-    description: |
-      a command-line utility for viewing compressed file content as plain text
+  - Biopython:
+      description: |
+        Python tools for computational molecular biology
+      homepage: https://biopython.org/
+      tool_dev_url: https://github.com/biopython/biopython
+      doi: 10.1093/bioinformatics/btp163
 input:
   - meta:
       type: map
@@ -31,8 +29,12 @@ output:
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
   - count:
-      type: env
-      description: Number of reads in the fastq files
+      type: file
+      description: Plain text file containing the number of reads in the fastq files
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
 authors:
   - "@kelly-sovacool"
 maintainers:
diff --git a/modules/CCBR/custom/countfastq/templates/count-fastq.py b/modules/CCBR/custom/countfastq/templates/count-fastq.py
index d83ee8fa..1f89b1ae 100644
--- a/modules/CCBR/custom/countfastq/templates/count-fastq.py
+++ b/modules/CCBR/custom/countfastq/templates/count-fastq.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import Bio.SeqIO
 import gzip
-import os
+import platform
 
 
 def main():
@@ -15,5 +15,13 @@ def main():
     return count
 
 
+def write_versions():
+    with open("versions.yml", "w") as outfile:
+        outfile.write('"${task.process}":\\n')
+        outfile.write(f'  Python: "{platform.python_version()}"\\n')
+        outfile.write(f'  Biopython: "{Bio.__version__}"\\n')
+
+
 if __name__ == "__main__":
-    print(main())
+    write_versions()
+    main()

From a72f2e3c1746014fa60c69518b45ac7edcd710b1 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 14:42:42 -0400
Subject: [PATCH 21/26] ci: fix test/stub config paths

---
 tests/cli/ci_stub.config | 18 +++++++++---------
 tests/cli/ci_test.config |  8 ++++----
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/cli/ci_stub.config b/tests/cli/ci_stub.config
index 2ba83f49..baaf26ee 100644
--- a/tests/cli/ci_stub.config
+++ b/tests/cli/ci_stub.config
@@ -3,7 +3,7 @@ params {
     config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir'
 
     outdir = 'results/test'
-    input = 'assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
+    input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
     genome = 'test'
 
     max_cpus = 2        // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
@@ -13,19 +13,19 @@ params {
     publish_dir_mode = "symlink"
 
     // CCBR shared resource paths
-    index_dir = "data"
+    index_dir = "../data"
     fastq_screen {
-        conf = "assets/fastq_screen_ci.conf"
-        db_dir = "data/fastq_screen_db"
+        conf = "fastq_screen_ci.conf"
+        db_dir = "../data/fastq_screen_db"
     }
     genomes {
         'test' { // blank files for testing stubs on GitHub Actions
-            blacklist_index = "data/test.blacklist"
-            reference_index = "data/test/*"
+            blacklist_index = "${params.index_dir}/test.blacklist"
+            reference_index = "${params.index_dir}/test/*"
             effective_genome_size = 2700000000
-            chrom_sizes = "data/test.fa.sizes"
-            gene_info = "data/geneinfo.bed"
-            chromosomes_dir = "data/chroms/"
+            chrom_sizes = "${params.index_dir}/test.fa.sizes"
+            gene_info = "${params.index_dir}/geneinfo.bed"
+            chromosomes_dir = "${params.index_dir}/chroms/"
         }
     }
     sicer {
diff --git a/tests/cli/ci_test.config b/tests/cli/ci_test.config
index f262c176..ef47eaff 100644
--- a/tests/cli/ci_test.config
+++ b/tests/cli/ci_test.config
@@ -3,7 +3,7 @@ params {
     config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir'
 
     outdir = 'results/test'
-    input = 'assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
+    input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
 
     genome = 'custom_genome'
     read_length = 50
@@ -11,8 +11,8 @@ params {
     // Genome references
     genome_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa'
     genes_gtf   = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf'
-    blacklist = 'data/test.blacklist'
-    rename_contigs = 'assets/R64-1-1_ensembl2UCSC.txt'
+    blacklist = '../data/test.blacklist'
+    rename_contigs = '${projectDir}/assets/R64-1-1_ensembl2UCSC.txt'
 
 
     max_cpus = 2        // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
@@ -22,7 +22,7 @@ params {
     publish_dir_mode = "symlink"
 
     // CCBR shared resource paths
-    index_dir = "data"
+    index_dir = "../data"
     fastq_screen = null
     sicer.species = "sacCer1" // supported species https://github.com/zanglab/SICER2/blob/master/sicer/lib/GenomeData.py
 

From 778cdf3815d9833b9aecd353d681722b5ebe4160 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 15:39:57 -0400
Subject: [PATCH 22/26] fix: need double quotes for strings w/ nf variables

---
 tests/cli/ci_stub.config | 2 +-
 tests/cli/ci_test.config | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/cli/ci_stub.config b/tests/cli/ci_stub.config
index baaf26ee..317a28aa 100644
--- a/tests/cli/ci_stub.config
+++ b/tests/cli/ci_stub.config
@@ -3,7 +3,7 @@ params {
     config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir'
 
     outdir = 'results/test'
-    input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
+    input = "${projectDir}/assets/samplesheet_test.csv" // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
     genome = 'test'
 
     max_cpus = 2        // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
diff --git a/tests/cli/ci_test.config b/tests/cli/ci_test.config
index ef47eaff..df6d9378 100644
--- a/tests/cli/ci_test.config
+++ b/tests/cli/ci_test.config
@@ -3,7 +3,7 @@ params {
     config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir'
 
     outdir = 'results/test'
-    input = '${projectDir}/assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
+    input = "${projectDir}/assets/samplesheet_test.csv" // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv
 
     genome = 'custom_genome'
     read_length = 50

From 10f4e4442a698fef0cf1b947ebddb0865056c044 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 22:39:29 -0400
Subject: [PATCH 23/26] ci: override max cpus on CLI

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 48d48bf0..0712d734 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -44,7 +44,7 @@ jobs:
           cd tests/cli
           which champagne
           champagne init
-          champagne run -stub -c ci_stub.config
+          champagne run -stub -c ci_stub.config --max_cpus 2
       - name: Test run
         if: ${{ env.test_run == 'true' }}
         run: |

From cb713db22575d8a4774340ffc6706afca755efbd Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 22:55:12 -0400
Subject: [PATCH 24/26] ci: override max memory

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0712d734..72e4e77c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -44,7 +44,7 @@ jobs:
           cd tests/cli
           which champagne
           champagne init
-          champagne run -stub -c ci_stub.config --max_cpus 2
+          champagne run -stub -c ci_stub.config --max_cpus 2 --max_memory 6.GB
       - name: Test run
         if: ${{ env.test_run == 'true' }}
         run: |

From 35c8bf2ae988d63bb10e6eb3be34af71ffc2c8a3 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 23:10:00 -0400
Subject: [PATCH 25/26] fix: use bam.baseName for output files

---
 modules/CCBR/samtools/flagstat/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/CCBR/samtools/flagstat/main.nf b/modules/CCBR/samtools/flagstat/main.nf
index 98f8e5df..02336a36 100644
--- a/modules/CCBR/samtools/flagstat/main.nf
+++ b/modules/CCBR/samtools/flagstat/main.nf
@@ -34,7 +34,7 @@ process SAMTOOLS_FLAGSTAT {
     """
 
     stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def prefix = task.ext.prefix ?: "${bam.baseName}"
     """
     touch ${prefix}.flagstat
 

From 99da78ea7b0d0dd796bb3542170024382fb7ddca Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Wed, 1 Nov 2023 23:10:11 -0400
Subject: [PATCH 26/26] fix: stub output filename

---
 modules/local/qc.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/qc.nf b/modules/local/qc.nf
index 12978425..6a4ac89f 100644
--- a/modules/local/qc.nf
+++ b/modules/local/qc.nf
@@ -313,7 +313,7 @@ process QC_TABLE {
 
     stub:
     """
-    touch qc_table.txt
+    touch qc_table.tsv
     """
 
 }