diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index de1e513e..72e4e77c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,16 +41,16 @@ jobs: pip install .[dev,test] - name: Stub run run: | - cd tests/ + cd tests/cli which champagne champagne init - champagne run -profile ci_stub -stub + champagne run -stub -c ci_stub.config --max_cpus 2 --max_memory 6.GB - name: Test run if: ${{ env.test_run == 'true' }} run: | - cd tests/ + cd tests/cli champagne init - champagne run -profile ci_test,docker + champagne run -profile docker -c ci_test.config - name: "Upload Artifact" uses: actions/upload-artifact@v3 if: always() # run even if previous steps fail diff --git a/.gitignore b/.gitignore index 1905fa74..b504e6de 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ replay_pid* /work*/ /data/ /results/ +/output/ /params.yaml # python packaging diff --git a/CHANGELOG.md b/CHANGELOG.md index cbb08c8e..5977ad40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## development version +- Fixed a bug in QC stats that mixed up the statistics for different samples. (#125) +- Fixed a bug in the CLI that added the `-profile` to the nextflow command even if it wasn't needed (#125). +- Report read counts between blacklist & filtering steps in the QC table. (#125) - Run spooker on workflow completion (#126). ## CHAMPAGNE 0.2.0 diff --git a/VERSION b/VERSION index 70426f85..0c62199f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.0-dev +0.2.1 diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 163ca5b4..2e921b59 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -78,13 +78,19 @@ custom_data: NReads: description: "The number of reads sequenced" format: "{:,.0f}" - hidden: True + N_reads_surviving_blacklist: + description: "The number of reads surviving after filtering blacklisted regions" + format: "{:,.0f}" + hidden: true NMappedReads: description: "The number of reads mapped" format: "{:,.0f}" - hidden: True + N_mapped_reads_surviving_filter: + description: "The number of mapped reads surviving after filtering by alignment quality" + format: "{:,.0f}" + hidden: true NUniqMappedReads: - description: "The number of reads remaining after deduplication" + description: "The number of mapped & filtered reads remaining after deduplication" format: "{:,.0f}" NRF: description: "Non-Redundant fraction" @@ -163,7 +169,7 @@ custom_data: sp: QC_Table: - fn: "qc_table.txt" + fn: "qc_table.tsv" NGSQC_data: fn: "*NGSQC.txt" frip_samples: diff --git a/bin/compare-tables.R b/bin/compare-tables.R new file mode 100644 index 00000000..d8214588 --- /dev/null +++ b/bin/compare-tables.R @@ -0,0 +1,15 @@ +library(tidyverse) + +original <- read.table("QCTable.txt", header = TRUE) %>% + as_tibble() %>% + mutate(across(contains("reads"), as.integer)) %>% + select(c("SampleName", contains("reads"))) %>% + pivot_longer(-SampleName, values_to = "value_orig") +new <- read_tsv("qc_table.tsv") %>% + select(SampleName, original %>% pull(name)) %>% + pivot_longer(-SampleName, values_to = "value_new") + + +inner_join(original, new) %>% + mutate(rel_diff_percent = round(100 * (value_new - value_orig) / value_orig, 2)) %>% + View() diff --git a/bin/count-peaks.R b/bin/count-peaks.R new file mode 100644 index 00000000..5bc93385 --- /dev/null +++ b/bin/count-peaks.R @@ -0,0 +1,25 @@ +library(tidyverse) +peak_counts <- read_tsv("peak_meta.tsv") %>% + group_by(sample_id, tool) %>% + count() %>% + rename(count_new = n) +peak_counts %>% + pull(tool) %>% + unique() + +peaks_old <- read_tsv("old_peak_counts.tsv") %>% + mutate(tool = str_remove(file, "/.*")) %>% + mutate( + tool = case_when( + tool == "macsBroad" ~ "macs_broad", + tool == "macsNarrow" ~ "macs_narrow", + TRUE ~ tool + ), + sample_id = str_replace(file, ".*/(.*)/.*", "\\1"), + ) %>% + rename(count_old = count) %>% + select(sample_id, tool, count_old) + +inner_join(peaks_old, peak_counts) %>% + mutate(rel_diff_percent = round(100 * (count_new - count_old) / count_old, 2)) %>% + View() diff --git a/bin/createtable.py b/bin/createtable.py index e0d730da..a4ab4845 100755 --- a/bin/createtable.py +++ b/bin/createtable.py @@ -33,10 +33,7 @@ def file2table(): df = pd.DataFrame(tabledict) df.index.name = "SampleName" df.reset_index(inplace=True) - # print(df[['NSC', 'FRiP', 'PCB1', 'PCB2', 'RSC']]) #re-order columns - # cols = df.columns.tolist() # view df columns names - # orderedcols = ordercolumns(cols) - # print(df.to_string()) + df = df.sort_values(by="SampleName") # sometimes preseq fails, resulting in some columns not being present. # so this only keeps columns that exist in the dict. @@ -46,7 +43,9 @@ def file2table(): for col in [ "SampleName", "NReads", + "N_reads_surviving_blacklist", "NMappedReads", + "N_mapped_reads_surviving_filter", "NUniqMappedReads", "NRF", "PBC1", @@ -58,8 +57,8 @@ def file2table(): ] if col in df_columns ] - - print(df[column_order].to_string(index=False, justify="left")) + df = df[column_order] + df.to_csv("qc_table.tsv", sep="\t", index=False) if __name__ == "__main__": diff --git a/bin/filterMetrics.py b/bin/filterMetrics.py index 5007132f..38a7fe34 100755 --- a/bin/filterMetrics.py +++ b/bin/filterMetrics.py @@ -64,6 +64,8 @@ def getmetadata(type): metadata = "NReads" elif type == "mnreads": metadata = "NMappedReads" + elif type == "N_mapped_reads_surviving_filter": + metadata = type elif type == "unreads": metadata = "NUniqMappedReads" elif type == "fragLen": diff --git a/conf/base.config b/conf/base.config index 6c7a4a17..c5251257 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -32,19 +31,19 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } + cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } + cpus = { check_max( 16 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { check_max( 32 * task.attempt, 'cpus' ) } + memory = { check_max( 120.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } @@ -59,15 +58,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - /* - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - }*/ - - // Custom CCBR resource requirements - withLabel:process_higher { - cpus = { check_max( 32 * task.attempt, 'cpus' ) } - memory = { check_max( 120.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } } diff --git a/conf/modules.config b/conf/modules.config index 2496842b..536a55f6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -8,20 +8,21 @@ process { errorStrategy = 'finish' - withName: 'INPUT_CHECK:SAMPLESHEET_CHECK' { + /* + withName: '.*CUSTOM_DUMPSOFTWAREVERSIONS' { + cache = false + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + }*/ + + withName: '.*INPUT_CHECK:SAMPLESHEET_CHECK' { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'CUTADAPT' { - ext.args = [ - '--nextseq-trim=2', - '--trim-n -n 5 -O 5', - '-q 10,10', - '-m 20', - '-b file:/opt2/TruSeq_and_nextera_adapters.consolidated.fa' - ].join(' ').trim() - } } diff --git a/conf/test.config b/conf/test.config index 8a1d203c..f1d813c0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,6 +18,7 @@ params { deeptools.excluded_chroms = 'chrM' run { qc = true + deeptools = true normalize_input = true call_peaks = true gem = true diff --git a/main.nf b/main.nf index 696ec0af..40290618 100644 --- a/main.nf +++ b/main.nf @@ -49,6 +49,10 @@ workflow MAKE_REFERENCE { // MAIN WORKFLOW workflow { + CHIPSEQ() +} + +workflow CHIPSEQ { INPUT_CHECK(file(params.input), params.seq_center) INPUT_CHECK.out.reads.set { raw_fastqs } raw_fastqs | CUTADAPT @@ -72,8 +76,8 @@ workflow { ch_multiqc = Channel.of() if (params.run.qc) { - QC(raw_fastqs, trimmed_fastqs, - aligned_bam, ALIGN_GENOME.out.flagstat, + QC(raw_fastqs, trimmed_fastqs, FILTER_BLACKLIST.out.n_surviving_reads, + aligned_bam, ALIGN_GENOME.out.aligned_flagstat, ALIGN_GENOME.out.filtered_flagstat, deduped_bam, DEDUPLICATE.out.flagstat, PHANTOM_PEAKS.out.spp, frag_lengths, PREPARE_GENOME.out.gene_info, diff --git a/modules.json b/modules.json index f0470703..3232c12b 100644 --- a/modules.json +++ b/modules.json @@ -12,8 +12,13 @@ }, "bwa/mem": { "branch": "main", - "git_sha": "ca4f84b4c2ca84eb0449b4ba414a8b8052f8d90a", - "installed_by": ["filter_blacklist", "modules"] + "git_sha": "7887b0e0dc5a0320d8ba84c2763ef8692c358087", + "installed_by": ["modules", "filter_blacklist"] + }, + "custom/countfastq": { + "branch": "main", + "git_sha": "2ccd43e3734de30fe61ed0ff80e6e3252929505e", + "installed_by": ["filter_blacklist"] }, "cutadapt": { "branch": "main", @@ -27,7 +32,7 @@ }, "picard/samtofastq": { "branch": "main", - "git_sha": "258d0f336ea1f851ab4223d295bb18b6dc187899", + "git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a", "installed_by": ["filter_blacklist"] }, "samtools/filteraligned": { @@ -35,9 +40,14 @@ "git_sha": "879e969c593ab9f321301ac15722728ab30cea49", "installed_by": ["filter_blacklist"] }, + "samtools/flagstat": { + "branch": "main", + "git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a", + "installed_by": ["modules"] + }, "samtools/sort": { "branch": "main", - "git_sha": "d55ab2580b69a81aa0534a3018cc6e6ea3b28640", + "git_sha": "5b39869abfc740c6243d18a3cd84aa7d78787125", "installed_by": ["modules"] } } @@ -46,7 +56,7 @@ "CCBR": { "filter_blacklist": { "branch": "main", - "git_sha": "bb7dbb42afe47d7e02b2f21e3352720ca2996e11", + "git_sha": "b7764378fac18bea8c84f9dd39cb595241b6e796", "installed_by": ["subworkflows"] } } @@ -57,17 +67,7 @@ "nf-core": { "bedtools/getfasta": { "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": ["modules"] - }, - "bwa/index": { - "branch": "master", - "git_sha": "28a23ea6529caff44855c774f439a4074883027c", - "installed_by": ["modules"] - }, - "samtools/flagstat": { - "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] } } diff --git a/modules/CCBR/bwa/mem/main.nf b/modules/CCBR/bwa/mem/main.nf index fe70049f..e9fa59f8 100644 --- a/modules/CCBR/bwa/mem/main.nf +++ b/modules/CCBR/bwa/mem/main.nf @@ -1,6 +1,6 @@ process BWA_MEM { tag { meta.id } - label 'process_higher' + label 'process_high' container 'nciccbr/ccbr_ubuntu_base_20.04:v5' diff --git a/modules/CCBR/custom/countfastq/main.nf b/modules/CCBR/custom/countfastq/main.nf new file mode 100644 index 00000000..18df8c37 --- /dev/null +++ b/modules/CCBR/custom/countfastq/main.nf @@ -0,0 +1,27 @@ + +process CUSTOM_COUNTFASTQ { + tag { meta.id } + label 'process_single' + + container 'nciccbr/ccbr_ubuntu_base_20.04:v6.1' + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.txt"), emit: count + path('versions.yml'), emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template 'count-fastq.py' + + stub: + """ + count=-1 + echo \$count > ${meta.id}.count.txt + touch versions.yml + """ +} diff --git a/modules/CCBR/custom/countfastq/meta.yml b/modules/CCBR/custom/countfastq/meta.yml new file mode 100644 index 00000000..0e1cda48 --- /dev/null +++ b/modules/CCBR/custom/countfastq/meta.yml @@ -0,0 +1,41 @@ +name: custom_countfastq +description: | + Count reads in a fastq file +keywords: + - fastq + - biopython + - python +tools: + - Biopython: + description: | + Python tools for computational molecular biology + homepage: https://biopython.org/ + tool_dev_url: https://github.com/biopython/biopython + doi: 10.1093/bioinformatics/btp163 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: fastq file + pattern: "*.{fastq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - count: + type: file + description: Plain text file containing the number of reads in the fastq files + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kelly-sovacool" +maintainers: + - "@kelly-sovacool" diff --git a/modules/CCBR/custom/countfastq/templates/count-fastq.py b/modules/CCBR/custom/countfastq/templates/count-fastq.py new file mode 100644 index 00000000..1f89b1ae --- /dev/null +++ b/modules/CCBR/custom/countfastq/templates/count-fastq.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +import Bio.SeqIO +import gzip +import platform + + +def main(): + count = 0 + for fastq_filename in "${fastq}".split(): + with gzip.open(fastq_filename, "rt") as file_handle: + n_seqs = sum(1 for rec in Bio.SeqIO.parse(file_handle, "fastq")) + count += n_seqs + with open("${meta.id}.count.txt", "w") as out_file: + out_file.write(str(count)) + return count + + +def write_versions(): + with open("versions.yml", "w") as outfile: + outfile.write('"${task.process}":\\n') + outfile.write(f' Python: "{platform.python_version()}"\\n') + outfile.write(f' Biopython: "{Bio.__version__}"\\n') + + +if __name__ == "__main__": + write_versions() + main() diff --git a/modules/CCBR/picard/samtofastq/main.nf b/modules/CCBR/picard/samtofastq/main.nf index 644f1e4d..13c1f01b 100644 --- a/modules/CCBR/picard/samtofastq/main.nf +++ b/modules/CCBR/picard/samtofastq/main.nf @@ -8,9 +8,10 @@ process PICARD_SAMTOFASTQ { tuple val(meta), path(bam) output: - tuple val(meta), path("*_?.fastq.gz"), emit: reads - path "versions.yml", emit: versions + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*_?.fastq.gz"), emit: paired, optional: true tuple val(meta), path("*unpaired.fastq.gz"), emit: unpaired, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/CCBR/samtools/flagstat/main.nf similarity index 91% rename from modules/nf-core/samtools/flagstat/main.nf rename to modules/CCBR/samtools/flagstat/main.nf index b75707ec..02336a36 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/CCBR/samtools/flagstat/main.nf @@ -19,7 +19,7 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${bam.baseName}" """ samtools \\ flagstat \\ @@ -34,7 +34,7 @@ process SAMTOOLS_FLAGSTAT { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${bam.baseName}" """ touch ${prefix}.flagstat diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/CCBR/samtools/flagstat/meta.yml similarity index 100% rename from modules/nf-core/samtools/flagstat/meta.yml rename to modules/CCBR/samtools/flagstat/meta.yml diff --git a/modules/CCBR/samtools/sort/main.nf b/modules/CCBR/samtools/sort/main.nf index 359b871d..5cf9e8d5 100644 --- a/modules/CCBR/samtools/sort/main.nf +++ b/modules/CCBR/samtools/sort/main.nf @@ -1,6 +1,6 @@ process SAMTOOLS_SORT { tag { meta.id } - label 'process_medium' + label 'process_high' container 'nciccbr/ccbr_ubuntu_base_20.04:v6' diff --git a/modules/local/deduplicate.nf b/modules/local/deduplicate.nf index 7d574a7a..5153ff19 100644 --- a/modules/local/deduplicate.nf +++ b/modules/local/deduplicate.nf @@ -44,7 +44,7 @@ process MACS2_DEDUP { process PICARD_DEDUP { tag { meta.id } label 'dedup' - label 'process_higher' + label 'process_high' container "${params.containers.picard}" diff --git a/modules/local/deeptools.nf b/modules/local/deeptools.nf index 42bd5a48..3a5db905 100644 --- a/modules/local/deeptools.nf +++ b/modules/local/deeptools.nf @@ -3,7 +3,7 @@ process BAM_COVERAGE { tag { meta.id } label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" @@ -125,7 +125,7 @@ process PLOT_PCA { process PLOT_FINGERPRINT { label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" @@ -187,7 +187,7 @@ process BED_PROTEIN_CODING { process COMPUTE_MATRIX { label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" @@ -303,7 +303,7 @@ process PLOT_PROFILE { process NORMALIZE_INPUT { label 'qc' label 'deeptools' - label 'process_higher' + label 'process_high' container = "${params.containers.deeptools}" diff --git a/modules/local/qc.nf b/modules/local/qc.nf index 8ab9f39b..6a4ac89f 100644 --- a/modules/local/qc.nf +++ b/modules/local/qc.nf @@ -2,7 +2,7 @@ process FASTQC { tag { meta.id } label 'qc' - label 'process_higher' + label 'process_high' publishDir "${params.outdir}/qc/fastqc_${fqtype}/${meta.id}", mode: "${params.publish_dir_mode}" container = "${params.containers.fastqc}" @@ -100,7 +100,7 @@ process HANDLE_PRESEQ_ERROR { tuple val(meta), val(log) output: - path("*nrf.txt"), emit: nrf + tuple val(meta), path("*nrf.txt"), emit: nrf script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -123,7 +123,7 @@ process PARSE_PRESEQ_LOG { tuple val(meta), path(log) output: - path("*nrf.txt"), emit: nrf + tuple val(meta), path("*nrf.txt"), emit: nrf script: def prefix = task.ext.prefix ?: "${meta.id}" @@ -150,7 +150,7 @@ process PHANTOM_PEAKS { output: path("${meta.id}.ppqt.pdf") , emit: pdf - path("${meta.id}.spp.out") , emit: spp + tuple val(meta), path("${meta.id}.spp.out") , emit: spp tuple val(meta), path("${meta.id}.fraglen.txt"), emit: fraglen path "versions.yml" , emit: versions @@ -251,32 +251,39 @@ process QC_STATS { container = "${params.containers.base}" input: - tuple val(meta), path(raw_fastq) - tuple val(meta), path(align_flagstat) - tuple path(dedup_flagstat), path(idxstat) - path(preseq_nrf) - path(ppqt_spp) - tuple val(meta), val(fraglen) + tuple val(meta), path(raw_fastq), path(count_file_blacklist), path(aligned_flagstat), path(filtered_flagstat), path(dedup_flagstat), path(idxstat), path(preseq_nrf), path(ppqt_spp), val(fraglen) output: path("${meta.id}.qc_stats.txt") script: - // TODO: handle paired reads def outfile = "${meta.id}.qc_stats.txt" """ touch ${outfile} + # Number of reads zcat ${raw_fastq} | wc -l | filterMetrics.py ${meta.id} tnreads >> ${outfile} + + # Number of reads after blacklist filter + n_reads_after_blacklist=`cat ${count_file_blacklist}` + echo -e "${meta.id}\\tN_reads_surviving_blacklist\\t\${n_reads_after_blacklist}" >> ${outfile} + # Number of mapped reads - grep 'mapped (' ${align_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile} + grep 'mapped (' ${aligned_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} mnreads >> ${outfile} + + # Number of mapped reads surviving filter + grep 'mapped (' ${filtered_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} N_mapped_reads_surviving_filter >> ${outfile} + # Number of uniquely mapped reads grep 'mapped (' ${dedup_flagstat} | awk '{{print \$1,\$3}}' | filterMetrics.py ${meta.id} unreads >> ${outfile} + # NRF, PCB1, PCB2 cat ${preseq_nrf} | filterMetrics.py ${meta.id} nrf >> ${outfile} + # NSC, RSC, Qtag awk '{{print \$(NF-2),\$(NF-1),\$NF}}' ${ppqt_spp} | filterMetrics.py ${meta.id} ppqt >> ${outfile} + # Fragment Length echo "${meta.id}\tFragmentLength\t${fraglen}" >> ${outfile} """ @@ -297,16 +304,16 @@ process QC_TABLE { path(qc_stats) output: - path("qc_table.txt"), emit: txt + path("qc_table.tsv"), emit: txt script: """ - cat ${qc_stats.join(' ')} | createtable.py > qc_table.txt + cat ${qc_stats.join(' ')} | createtable.py """ stub: """ - touch qc_table.txt + touch qc_table.tsv """ } diff --git a/modules/local/samtools_index.nf b/modules/local/samtools_index.nf index 997041d3..d4848313 100644 --- a/modules/local/samtools_index.nf +++ b/modules/local/samtools_index.nf @@ -1,6 +1,6 @@ -process SAMTOOLS_INDEX { +process SAMTOOLS_INDEX { // TODO create/use flagstat & idxstat module in nf-modules tag { meta.id } - label 'process_medium' + label 'process_high' container = "${params.containers.base}" @@ -9,7 +9,7 @@ process SAMTOOLS_INDEX { output: tuple val(meta), path("${bam.baseName}.sort.bam"), path("${bam.baseName}.sort.bam.bai"), emit: bam - tuple path("${bam.baseName}.sort.bam.flagstat"), path("${bam.baseName}.sort.bam.idxstat"), emit: flagstat + tuple val(meta), path("${bam.baseName}.sort.bam.flagstat"), path("${bam.baseName}.sort.bam.idxstat"), emit: flagstat script: """ diff --git a/modules/nf-core/bedtools/getfasta/environment.yml b/modules/nf-core/bedtools/getfasta/environment.yml new file mode 100644 index 00000000..55ce727a --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.30.0 diff --git a/modules/nf-core/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf index 84adc4c9..53982e11 100644 --- a/modules/nf-core/bedtools/getfasta/main.nf +++ b/modules/nf-core/bedtools/getfasta/main.nf @@ -2,7 +2,7 @@ process BEDTOOLS_GETFASTA { tag "$bed" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf deleted file mode 100644 index c30d194d..00000000 --- a/modules/nf-core/bwa/index/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process BWA_INDEX { - tag "$fasta" - label 'process_single' - - conda "bioconda::bwa=0.7.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'biocontainers/bwa:0.7.17--hed695b0_7' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path(bwa) , emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${fasta.baseName}" - def args = task.ext.args ?: '' - """ - mkdir bwa - bwa \\ - index \\ - $args \\ - -p bwa/${prefix} \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${fasta.baseName}" - """ - mkdir bwa - - touch bwa/${prefix}.amb - touch bwa/${prefix}.ann - touch bwa/${prefix}.bwt - touch bwa/${prefix}.pac - touch bwa/${prefix}.sa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml deleted file mode 100644 index 730628d0..00000000 --- a/modules/nf-core/bwa/index/meta.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: bwa_index -description: Create BWA index for reference genome -keywords: - - index - - fasta - - genome - - reference -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input genome fasta file -output: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: BWA genome index files - pattern: "*.{amb,ann,bwt,pac,sa}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@maxulysse" -maintainers: - - "@drpatelh" - - "@maxulysse" diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test deleted file mode 100644 index 2f33c0e8..00000000 --- a/modules/nf-core/bwa/index/tests/main.nf.test +++ /dev/null @@ -1,33 +0,0 @@ -nextflow_process { - - name "Test Process BWA_INDEX" - tag "modules_nfcore" - tag "modules" - tag "bwa" - tag "bwa/index" - script "../main.nf" - process "BWA_INDEX" - - test("BWA index") { - - when { - process { - """ - input[0] = [ - [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap deleted file mode 100644 index 492d8f6a..00000000 --- a/modules/nf-core/bwa/index/tests/main.nf.test.snap +++ /dev/null @@ -1,43 +0,0 @@ -{ - "BWA index": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - [ - "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", - "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", - "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", - "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", - "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" - ] - ] - ], - "1": [ - "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" - ], - "index": [ - [ - { - "id": "test" - }, - [ - "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", - "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", - "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", - "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", - "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" - ] - ] - ], - "versions": [ - "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" - ] - } - ], - "timestamp": "2023-10-17T17:20:20.180927714" - } -} diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml deleted file mode 100644 index 28bb483c..00000000 --- a/modules/nf-core/bwa/index/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bwa/index: - - modules/nf-core/bwa/index/** diff --git a/nextflow.config b/nextflow.config index 7c74fe0c..34f9a3c2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,6 +67,7 @@ params { run { // some steps can be turned on/off for debugging purposes qc = true + deeptools = true normalize_input = true call_peaks = true gem = true @@ -112,12 +113,6 @@ profiles { full_mm10 { includeConfig "conf/full_mm10.config" } - ci_stub { - includeConfig "conf/ci_stub.config" - } - ci_test { - includeConfig "conf/ci_test.config" - } } includeConfig 'conf/genomes.config' diff --git a/src/util.py b/src/util.py index c4e79851..42b860c8 100644 --- a/src/util.py +++ b/src/util.py @@ -178,7 +178,8 @@ def run_nextflow( profiles.add("slurm") if hpc: profiles.add(hpc_options[hpc]["profile"]) - args_dict["-profile"] = ",".join(sorted(profiles)) + if profiles: + args_dict["-profile"] = ",".join(sorted(profiles)) nextflow_command += list(f"{k} {v}" for k, v in args_dict.items()) # Print nextflow command diff --git a/subworkflows/CCBR/filter_blacklist/main.nf b/subworkflows/CCBR/filter_blacklist/main.nf index 14494899..4c11dc7e 100644 --- a/subworkflows/CCBR/filter_blacklist/main.nf +++ b/subworkflows/CCBR/filter_blacklist/main.nf @@ -1,8 +1,9 @@ -include { BWA_MEM } from '../../../modules/CCBR/bwa/mem' +include { BWA_MEM } from '../../../modules/CCBR/bwa/mem' include { SAMTOOLS_FILTERALIGNED } from '../../../modules/CCBR/samtools/filteraligned' -include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq' +include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq' +include { CUSTOM_COUNTFASTQ } from '../../../modules/CCBR/custom/countfastq' workflow FILTER_BLACKLIST { take: @@ -15,14 +16,17 @@ workflow FILTER_BLACKLIST { BWA_MEM ( ch_fastq_input, ch_blacklist_index ) SAMTOOLS_FILTERALIGNED( BWA_MEM.out.bam ) PICARD_SAMTOFASTQ( SAMTOOLS_FILTERALIGNED.out.bam ) + CUSTOM_COUNTFASTQ( PICARD_SAMTOFASTQ.out.paired ) ch_versions = ch_versions.mix( BWA_MEM.out.versions, SAMTOOLS_FILTERALIGNED.out.versions, - PICARD_SAMTOFASTQ.out.versions + PICARD_SAMTOFASTQ.out.versions, + CUSTOM_COUNTFASTQ.out.versions ) emit: - reads = PICARD_SAMTOFASTQ.out.reads // channel: [ val(meta), path(fastq) ] - versions = ch_versions // channel: [ path(versions.yml) ] + reads = PICARD_SAMTOFASTQ.out.paired // channel: [ val(meta), path(fastq) ] + n_surviving_reads = CUSTOM_COUNTFASTQ.out.count + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/CCBR/filter_blacklist/meta.yml b/subworkflows/CCBR/filter_blacklist/meta.yml index 7eeb732c..df1797d1 100644 --- a/subworkflows/CCBR/filter_blacklist/meta.yml +++ b/subworkflows/CCBR/filter_blacklist/meta.yml @@ -11,15 +11,19 @@ components: - bwa/mem - samtools/filteraligned - picard/samtofastq + - custom/countfastq input: - ch_fastq_input: + type: map description: | A channel containing fastq files - ch_blacklist_index: + type: file description: | A BWA index created by running BWA/INDEX on a fasta file of blacklisted regions/ output: - reads: + type: map description: | Reads from the fastq files that do not align to the blacklist - versions: diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 2e446ade..e0a0385c 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -1,7 +1,8 @@ include { BWA_MEM } from "../../modules/CCBR/bwa/mem" include { FILTER_QUALITY } from "../../modules/local/align.nf" -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' -include { SAMTOOLS_SORT } from '../../modules/CCBR/samtools/sort/main' // TODO use ccbr samtools/sort +include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_ALIGN + SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_FILTER } from '../../modules/CCBR/samtools/flagstat' +include { SAMTOOLS_SORT } from '../../modules/CCBR/samtools/sort' workflow ALIGN_GENOME { @@ -11,17 +12,19 @@ workflow ALIGN_GENOME { main: BWA_MEM(reads, reference) + SAMTOOLS_FLAGSTAT_ALIGN( BWA_MEM.out.bam ) FILTER_QUALITY( BWA_MEM.out.bam ) SAMTOOLS_SORT( FILTER_QUALITY.out.bam ) - SAMTOOLS_FLAGSTAT( SAMTOOLS_SORT.out.bam ) + SAMTOOLS_FLAGSTAT_FILTER( SAMTOOLS_SORT.out.bam ) ch_versions = Channel.empty().mix( BWA_MEM.out.versions, - SAMTOOLS_FLAGSTAT.out.versions + SAMTOOLS_FLAGSTAT_ALIGN.out.versions ) emit: - bam = FILTER_QUALITY.out.bam - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat - versions = ch_versions + bam = FILTER_QUALITY.out.bam + aligned_flagstat = SAMTOOLS_FLAGSTAT_ALIGN.out.flagstat + filtered_flagstat = SAMTOOLS_FLAGSTAT_FILTER.out.flagstat + versions = ch_versions } diff --git a/subworkflows/local/deeptools.nf b/subworkflows/local/deeptools.nf new file mode 100644 index 00000000..da601911 --- /dev/null +++ b/subworkflows/local/deeptools.nf @@ -0,0 +1,62 @@ + +include { BAM_COVERAGE } from "../../modules/local/deeptools.nf" +include { BIGWIG_SUM } from "../../modules/local/deeptools.nf" +include { BED_PROTEIN_CODING } from "../../modules/local/deeptools.nf" +include { COMPUTE_MATRIX } from "../../modules/local/deeptools.nf" +include { PLOT_FINGERPRINT } from "../../modules/local/deeptools.nf" +include { PLOT_CORRELATION } from "../../modules/local/deeptools.nf" +include { PLOT_PCA } from "../../modules/local/deeptools.nf" +include { PLOT_HEATMAP } from "../../modules/local/deeptools.nf" +include { PLOT_PROFILE } from "../../modules/local/deeptools.nf" + +workflow DEEPTOOLS { + take: + deduped_bam + frag_lengths + effective_genome_size + gene_info + + main: + + deduped_bam.join(frag_lengths).combine(effective_genome_size) | BAM_COVERAGE + BAM_COVERAGE.out.bigwig.collect().set{ bigwig_list } + BIGWIG_SUM(bigwig_list) + BIGWIG_SUM.out.array.combine(Channel.from('heatmap', 'scatterplot')) | PLOT_CORRELATION + BIGWIG_SUM.out.array | PLOT_PCA + + // Create channel: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] + deduped_bam + .combine(deduped_bam) + .map { + meta1, bam1, bai1, meta2, bam2, bai2 -> + meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null + } + .set { ch_ip_ctrl_bam_bai } + ch_ip_ctrl_bam_bai | PLOT_FINGERPRINT + gene_info | BED_PROTEIN_CODING + COMPUTE_MATRIX(bigwig_list, + BED_PROTEIN_CODING.out.bed.combine(Channel.from('metagene','TSS')) + ) + PLOT_HEATMAP(COMPUTE_MATRIX.out.mat) + PLOT_PROFILE(COMPUTE_MATRIX.out.mat) + + // Create channel: [ meta, ip_bw, control_bw ] + BAM_COVERAGE.out.meta + .merge(BAM_COVERAGE.out.bigwig) + .set { bigwigs } + bigwigs + .combine(bigwigs) + .map { + meta1, bw1, meta2, bw2 -> + meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null + } + .set { ch_ip_ctrl_bigwig } + + emit: + bigwig = ch_ip_ctrl_bigwig + fingerprint_matrix = PLOT_FINGERPRINT.out.matrix + fingerprint_metrics = PLOT_FINGERPRINT.out.metrics + corr = PLOT_CORRELATION.out.tab + pca = PLOT_PCA.out.tab + profile = PLOT_PROFILE.out.tab +} diff --git a/subworkflows/local/qc.nf b/subworkflows/local/qc.nf index 2a377e35..75e6975c 100644 --- a/subworkflows/local/qc.nf +++ b/subworkflows/local/qc.nf @@ -1,4 +1,4 @@ - +// modules include { FASTQC as FASTQC_RAW } from "../../modules/local/qc.nf" include { FASTQC as FASTQC_TRIMMED } from "../../modules/local/qc.nf" include { FASTQ_SCREEN } from "../../modules/local/qc.nf" @@ -9,22 +9,17 @@ include { QC_STATS } from "../../modules/local/qc.nf" include { QC_TABLE } from "../../modules/local/qc.nf" include { MULTIQC } from "../../modules/local/qc.nf" -include { BAM_COVERAGE } from "../../modules/local/deeptools.nf" -include { BIGWIG_SUM } from "../../modules/local/deeptools.nf" -include { BED_PROTEIN_CODING } from "../../modules/local/deeptools.nf" -include { COMPUTE_MATRIX } from "../../modules/local/deeptools.nf" -include { PLOT_FINGERPRINT } from "../../modules/local/deeptools.nf" -include { PLOT_CORRELATION } from "../../modules/local/deeptools.nf" -include { PLOT_PCA } from "../../modules/local/deeptools.nf" -include { PLOT_HEATMAP } from "../../modules/local/deeptools.nf" -include { PLOT_PROFILE } from "../../modules/local/deeptools.nf" +// subworkflows +include { DEEPTOOLS } from "../../subworkflows/local/deeptools.nf" workflow QC { take: raw_fastqs trimmed_fastqs - aligned_bam + n_reads_surviving_blacklist + aligned_filtered_bam aligned_flagstat + filtered_flagstat deduped_bam deduped_flagstat ppqt_spp @@ -43,10 +38,10 @@ workflow QC { type: 'dir', checkIfExists: true)) | FASTQ_SCREEN ch_multiqc = ch_multiqc.mix(FASTQ_SCREEN.out.screen) } - PRESEQ(aligned_bam) + PRESEQ(aligned_filtered_bam) // when preseq fails, write NAs for the stats that are calculated from its log PRESEQ.out.log - .join(aligned_bam, remainder: true) + .join(aligned_filtered_bam, remainder: true) .branch { meta, preseq_log, bam_tuple -> failed: preseq_log == null return (tuple(meta, "nopresqlog")) @@ -59,67 +54,54 @@ workflow QC { .concat(HANDLE_PRESEQ_ERROR.out.nrf) .set{ preseq_nrf } - QC_STATS( - raw_fastqs, - aligned_flagstat, - deduped_flagstat, - preseq_nrf, - ppqt_spp, - frag_lengths - ) - QC_TABLE(QC_STATS.out.collect()) - - // Deeptools + qc_stats_input = raw_fastqs + .join(n_reads_surviving_blacklist) + .join(aligned_flagstat) + .join(filtered_flagstat) + .join(deduped_flagstat) + .join(preseq_nrf) + .join(ppqt_spp) + .join(frag_lengths) + QC_STATS( qc_stats_input ) + QC_TABLE( QC_STATS.out.collect() ) - deduped_bam.join(frag_lengths).combine(effective_genome_size) | BAM_COVERAGE - BAM_COVERAGE.out.bigwig.collect().set{ bigwig_list } - BIGWIG_SUM(bigwig_list) - BIGWIG_SUM.out.array.combine(Channel.from('heatmap', 'scatterplot')) | PLOT_CORRELATION - BIGWIG_SUM.out.array | PLOT_PCA - - // Create channel: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] - deduped_bam - .combine(deduped_bam) - .map { - meta1, bam1, bai1, meta2, bam2, bai2 -> - meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null + deduped_flagstat + .map { meta, flagstat, idxstat -> + [ flagstat, idxstat ] } - .set { ch_ip_ctrl_bam_bai } - ch_ip_ctrl_bam_bai | PLOT_FINGERPRINT - gene_info | BED_PROTEIN_CODING - COMPUTE_MATRIX(bigwig_list, - BED_PROTEIN_CODING.out.bed.combine(Channel.from('metagene','TSS')) - ) - PLOT_HEATMAP(COMPUTE_MATRIX.out.mat) - PLOT_PROFILE(COMPUTE_MATRIX.out.mat) - - // Create channel: [ meta, ip_bw, control_bw ] - BAM_COVERAGE.out.meta - .merge(BAM_COVERAGE.out.bigwig) - .set { bigwigs } - bigwigs - .combine(bigwigs) - .map { - meta1, bw1, meta2, bw2 -> - meta1.control == meta2.id ? [ meta1, bw1, bw2 ] : null + .set{ dedup_flagstat_files } + ppqt_spp + .map { meta, spp -> + [ spp ] } - .set { ch_ip_ctrl_bigwig } - + .set{ ppqt_spp_files } ch_multiqc = ch_multiqc.mix( FASTQC_RAW.out.zip, FASTQC_TRIMMED.out.zip, - deduped_flagstat, - ppqt_spp, - QC_TABLE.out, - PLOT_FINGERPRINT.out.matrix, - PLOT_FINGERPRINT.out.metrics, - PLOT_CORRELATION.out.tab, - PLOT_PCA.out.tab, - PLOT_PROFILE.out.tab + dedup_flagstat_files, + ppqt_spp_files, + QC_TABLE.out.txt ) + ch_ip_ctrl_bigwig = Channel.empty() + if (params.run.deeptools) { + DEEPTOOLS( deduped_bam, + frag_lengths, + effective_genome_size, + gene_info + ) + ch_ip_ctrl_bigwig = DEEPTOOLS.out.bigwig + ch_multiqc = ch_multiqc.mix( + DEEPTOOLS.out.fingerprint_matrix, + DEEPTOOLS.out.fingerprint_metrics, + DEEPTOOLS.out.corr, + DEEPTOOLS.out.pca, + DEEPTOOLS.out.profile + ) + } + emit: - bigwigs = ch_ip_ctrl_bigwig + bigwigs = ch_ip_ctrl_bigwig multiqc_input = ch_multiqc } diff --git a/tests/.gitignore b/tests/.gitignore deleted file mode 100644 index 29db90be..00000000 --- a/tests/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -assets/ -conf/ -results/ -work/ - -nextflow.config -submit_slurm.sh diff --git a/tests/cli/.gitignore b/tests/cli/.gitignore new file mode 100644 index 00000000..810a465d --- /dev/null +++ b/tests/cli/.gitignore @@ -0,0 +1,4 @@ +/assets/ +/conf/ +/nextflow.config +/submit_slurm.sh diff --git a/conf/ci_stub.config b/tests/cli/ci_stub.config similarity index 60% rename from conf/ci_stub.config rename to tests/cli/ci_stub.config index 2ba83f49..317a28aa 100644 --- a/conf/ci_stub.config +++ b/tests/cli/ci_stub.config @@ -3,7 +3,7 @@ params { config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir' outdir = 'results/test' - input = 'assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv + input = "${projectDir}/assets/samplesheet_test.csv" // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv genome = 'test' max_cpus = 2 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources @@ -13,19 +13,19 @@ params { publish_dir_mode = "symlink" // CCBR shared resource paths - index_dir = "data" + index_dir = "../data" fastq_screen { - conf = "assets/fastq_screen_ci.conf" - db_dir = "data/fastq_screen_db" + conf = "fastq_screen_ci.conf" + db_dir = "../data/fastq_screen_db" } genomes { 'test' { // blank files for testing stubs on GitHub Actions - blacklist_index = "data/test.blacklist" - reference_index = "data/test/*" + blacklist_index = "${params.index_dir}/test.blacklist" + reference_index = "${params.index_dir}/test/*" effective_genome_size = 2700000000 - chrom_sizes = "data/test.fa.sizes" - gene_info = "data/geneinfo.bed" - chromosomes_dir = "data/chroms/" + chrom_sizes = "${params.index_dir}/test.fa.sizes" + gene_info = "${params.index_dir}/geneinfo.bed" + chromosomes_dir = "${params.index_dir}/chroms/" } } sicer { diff --git a/conf/ci_test.config b/tests/cli/ci_test.config similarity index 80% rename from conf/ci_test.config rename to tests/cli/ci_test.config index f262c176..df6d9378 100644 --- a/conf/ci_test.config +++ b/tests/cli/ci_test.config @@ -3,7 +3,7 @@ params { config_profile_description = 'Minimal test dataset with blank references to run stubs with continuous integration to run from the tests/ dir' outdir = 'results/test' - input = 'assets/samplesheet_test.csv' // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv + input = "${projectDir}/assets/samplesheet_test.csv" // adapted from https://github.com/nf-core/test-datasets/blob/chipseq/samplesheet/v2.0/samplesheet_test.csv genome = 'custom_genome' read_length = 50 @@ -11,8 +11,8 @@ params { // Genome references genome_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' genes_gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf' - blacklist = 'data/test.blacklist' - rename_contigs = 'assets/R64-1-1_ensembl2UCSC.txt' + blacklist = '../data/test.blacklist' + rename_contigs = '${projectDir}/assets/R64-1-1_ensembl2UCSC.txt' max_cpus = 2 // for GitHub Actions https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources @@ -22,7 +22,7 @@ params { publish_dir_mode = "symlink" // CCBR shared resource paths - index_dir = "data" + index_dir = "../data" fastq_screen = null sicer.species = "sacCer1" // supported species https://github.com/zanglab/SICER2/blob/master/sicer/lib/GenomeData.py diff --git a/assets/fastq_screen_ci.conf b/tests/cli/fastq_screen_ci.conf similarity index 100% rename from assets/fastq_screen_ci.conf rename to tests/cli/fastq_screen_ci.conf