Skip to content

Commit

Permalink
Merge pull request #125 from CCBR/fix-qc-stats
Browse files Browse the repository at this point in the history
fix: QC stats sample IDs; add read counts between steps
  • Loading branch information
kelly-sovacool authored Nov 2, 2023
2 parents 8302666 + 263f9e4 commit 637471f
Show file tree
Hide file tree
Showing 45 changed files with 396 additions and 370 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,16 @@ jobs:
pip install .[dev,test]
- name: Stub run
run: |
cd tests/
cd tests/cli
which champagne
champagne init
champagne run -profile ci_stub -stub
champagne run -stub -c ci_stub.config --max_cpus 2 --max_memory 6.GB
- name: Test run
if: ${{ env.test_run == 'true' }}
run: |
cd tests/
cd tests/cli
champagne init
champagne run -profile ci_test,docker
champagne run -profile docker -c ci_test.config
- name: "Upload Artifact"
uses: actions/upload-artifact@v3
if: always() # run even if previous steps fail
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ replay_pid*
/work*/
/data/
/results/
/output/
/params.yaml

# python packaging
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## development version

- Fixed a bug in QC stats that mixed up the statistics for different samples. (#125)
- Fixed a bug in the CLI that added the `-profile` to the nextflow command even if it wasn't needed (#125).
- Report read counts between blacklist & filtering steps in the QC table. (#125)
- Run spooker on workflow completion (#126).

## CHAMPAGNE 0.2.0
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.0-dev
0.2.1
14 changes: 10 additions & 4 deletions assets/multiqc_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,19 @@ custom_data:
NReads:
description: "The number of reads sequenced"
format: "{:,.0f}"
hidden: True
N_reads_surviving_blacklist:
description: "The number of reads surviving after filtering blacklisted regions"
format: "{:,.0f}"
hidden: true
NMappedReads:
description: "The number of reads mapped"
format: "{:,.0f}"
hidden: True
N_mapped_reads_surviving_filter:
description: "The number of mapped reads surviving after filtering by alignment quality"
format: "{:,.0f}"
hidden: true
NUniqMappedReads:
description: "The number of reads remaining after deduplication"
description: "The number of mapped & filtered reads remaining after deduplication"
format: "{:,.0f}"
NRF:
description: "Non-Redundant fraction"
Expand Down Expand Up @@ -163,7 +169,7 @@ custom_data:

sp:
QC_Table:
fn: "qc_table.txt"
fn: "qc_table.tsv"
NGSQC_data:
fn: "*NGSQC.txt"
frip_samples:
Expand Down
15 changes: 15 additions & 0 deletions bin/compare-tables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
library(tidyverse)

original <- read.table("QCTable.txt", header = TRUE) %>%
as_tibble() %>%
mutate(across(contains("reads"), as.integer)) %>%
select(c("SampleName", contains("reads"))) %>%
pivot_longer(-SampleName, values_to = "value_orig")
new <- read_tsv("qc_table.tsv") %>%
select(SampleName, original %>% pull(name)) %>%
pivot_longer(-SampleName, values_to = "value_new")


inner_join(original, new) %>%
mutate(rel_diff_percent = round(100 * (value_new - value_orig) / value_orig, 2)) %>%
View()
25 changes: 25 additions & 0 deletions bin/count-peaks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
library(tidyverse)
peak_counts <- read_tsv("peak_meta.tsv") %>%
group_by(sample_id, tool) %>%
count() %>%
rename(count_new = n)
peak_counts %>%
pull(tool) %>%
unique()

peaks_old <- read_tsv("old_peak_counts.tsv") %>%
mutate(tool = str_remove(file, "/.*")) %>%
mutate(
tool = case_when(
tool == "macsBroad" ~ "macs_broad",
tool == "macsNarrow" ~ "macs_narrow",
TRUE ~ tool
),
sample_id = str_replace(file, ".*/(.*)/.*", "\\1"),
) %>%
rename(count_old = count) %>%
select(sample_id, tool, count_old)

inner_join(peaks_old, peak_counts) %>%
mutate(rel_diff_percent = round(100 * (count_new - count_old) / count_old, 2)) %>%
View()
11 changes: 5 additions & 6 deletions bin/createtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ def file2table():
df = pd.DataFrame(tabledict)
df.index.name = "SampleName"
df.reset_index(inplace=True)
# print(df[['NSC', 'FRiP', 'PCB1', 'PCB2', 'RSC']]) #re-order columns
# cols = df.columns.tolist() # view df columns names
# orderedcols = ordercolumns(cols)
# print(df.to_string())
df = df.sort_values(by="SampleName")

# sometimes preseq fails, resulting in some columns not being present.
# so this only keeps columns that exist in the dict.
Expand All @@ -46,7 +43,9 @@ def file2table():
for col in [
"SampleName",
"NReads",
"N_reads_surviving_blacklist",
"NMappedReads",
"N_mapped_reads_surviving_filter",
"NUniqMappedReads",
"NRF",
"PBC1",
Expand All @@ -58,8 +57,8 @@ def file2table():
]
if col in df_columns
]

print(df[column_order].to_string(index=False, justify="left"))
df = df[column_order]
df.to_csv("qc_table.tsv", sep="\t", index=False)


if __name__ == "__main__":
Expand Down
2 changes: 2 additions & 0 deletions bin/filterMetrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def getmetadata(type):
metadata = "NReads"
elif type == "mnreads":
metadata = "NMappedReads"
elif type == "N_mapped_reads_surviving_filter":
metadata = type
elif type == "unreads":
metadata = "NUniqMappedReads"
elif type == "fragLen":
Expand Down
22 changes: 5 additions & 17 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

process {

// TODO nf-core: Check the defaults for all processes
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
Expand All @@ -32,19 +31,19 @@ process {
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
cpus = { check_max( 16 * task.attempt, 'cpus' ) }
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
cpus = { check_max( 32 * task.attempt, 'cpus' ) }
memory = { check_max( 120.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
Expand All @@ -59,15 +58,4 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
/*
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}*/

// Custom CCBR resource requirements
withLabel:process_higher {
cpus = { check_max( 32 * task.attempt, 'cpus' ) }
memory = { check_max( 120.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
}
21 changes: 11 additions & 10 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,21 @@ process {

errorStrategy = 'finish'

withName: 'INPUT_CHECK:SAMPLESHEET_CHECK' {
/*
withName: '.*CUSTOM_DUMPSOFTWAREVERSIONS' {
cache = false
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
pattern: '*_versions.yml'
]
}*/

withName: '.*INPUT_CHECK:SAMPLESHEET_CHECK' {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'CUTADAPT' {
ext.args = [
'--nextseq-trim=2',
'--trim-n -n 5 -O 5',
'-q 10,10',
'-m 20',
'-b file:/opt2/TruSeq_and_nextera_adapters.consolidated.fa'
].join(' ').trim()
}
}
1 change: 1 addition & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ params {
deeptools.excluded_chroms = 'chrM'
run {
qc = true
deeptools = true
normalize_input = true
call_peaks = true
gem = true
Expand Down
8 changes: 6 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ workflow MAKE_REFERENCE {

// MAIN WORKFLOW
workflow {
CHIPSEQ()
}

workflow CHIPSEQ {
INPUT_CHECK(file(params.input), params.seq_center)
INPUT_CHECK.out.reads.set { raw_fastqs }
raw_fastqs | CUTADAPT
Expand All @@ -72,8 +76,8 @@ workflow {

ch_multiqc = Channel.of()
if (params.run.qc) {
QC(raw_fastqs, trimmed_fastqs,
aligned_bam, ALIGN_GENOME.out.flagstat,
QC(raw_fastqs, trimmed_fastqs, FILTER_BLACKLIST.out.n_surviving_reads,
aligned_bam, ALIGN_GENOME.out.aligned_flagstat, ALIGN_GENOME.out.filtered_flagstat,
deduped_bam, DEDUPLICATE.out.flagstat,
PHANTOM_PEAKS.out.spp, frag_lengths,
PREPARE_GENOME.out.gene_info,
Expand Down
32 changes: 16 additions & 16 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,13 @@
},
"bwa/mem": {
"branch": "main",
"git_sha": "ca4f84b4c2ca84eb0449b4ba414a8b8052f8d90a",
"installed_by": ["filter_blacklist", "modules"]
"git_sha": "7887b0e0dc5a0320d8ba84c2763ef8692c358087",
"installed_by": ["modules", "filter_blacklist"]
},
"custom/countfastq": {
"branch": "main",
"git_sha": "2ccd43e3734de30fe61ed0ff80e6e3252929505e",
"installed_by": ["filter_blacklist"]
},
"cutadapt": {
"branch": "main",
Expand All @@ -27,17 +32,22 @@
},
"picard/samtofastq": {
"branch": "main",
"git_sha": "258d0f336ea1f851ab4223d295bb18b6dc187899",
"git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a",
"installed_by": ["filter_blacklist"]
},
"samtools/filteraligned": {
"branch": "main",
"git_sha": "879e969c593ab9f321301ac15722728ab30cea49",
"installed_by": ["filter_blacklist"]
},
"samtools/flagstat": {
"branch": "main",
"git_sha": "25e6e67a4ec172db1bbb0ef995c4a470d847143a",
"installed_by": ["modules"]
},
"samtools/sort": {
"branch": "main",
"git_sha": "d55ab2580b69a81aa0534a3018cc6e6ea3b28640",
"git_sha": "5b39869abfc740c6243d18a3cd84aa7d78787125",
"installed_by": ["modules"]
}
}
Expand All @@ -46,7 +56,7 @@
"CCBR": {
"filter_blacklist": {
"branch": "main",
"git_sha": "bb7dbb42afe47d7e02b2f21e3352720ca2996e11",
"git_sha": "b7764378fac18bea8c84f9dd39cb595241b6e796",
"installed_by": ["subworkflows"]
}
}
Expand All @@ -57,17 +67,7 @@
"nf-core": {
"bedtools/getfasta": {
"branch": "master",
"git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
"installed_by": ["modules"]
},
"bwa/index": {
"branch": "master",
"git_sha": "28a23ea6529caff44855c774f439a4074883027c",
"installed_by": ["modules"]
},
"samtools/flagstat": {
"branch": "master",
"git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
}
}
Expand Down
2 changes: 1 addition & 1 deletion modules/CCBR/bwa/mem/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions modules/CCBR/custom/countfastq/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 637471f

Please sign in to comment.