From cf05634a240fb4cc5dd41a09e26e0312487afcf7 Mon Sep 17 00:00:00 2001 From: slsevilla Date: Thu, 30 Mar 2023 20:04:05 -0400 Subject: [PATCH] bug fix for contrasts set to N --- workflow/Snakefile | 43 ++++++----- workflow/rules/annotations.smk | 134 ++++++++++++++++----------------- 2 files changed, 92 insertions(+), 85 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 9a472dd..3e410c4 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -143,22 +143,23 @@ def get_rose(wildcards): def get_enrichment(wildcards): files=[] - if (GENOME == "hg19") or (GENOME == "hg38"): - if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): - t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="macs2",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) - files.extend(t) - h=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"),peak_caller="macs2",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) - files.extend(h) - if ("gopeaks_narrow" in PEAKTYPE) or ("gopeaks_broad" in PEAKTYPE): - t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) - files.extend(t) - h=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) - files.extend(h) - if ("seacr_norm_stringent" in PEAKTYPE) or ("seacr_norm_relaxed" in PEAKTYPE) or ("seacr_non_stringent" in PEAKTYPE) or ("seacr_non_relaxed" in PEAKTYPE): - t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="seacr",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) - files.extend(t) - h=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"),peak_caller="seacr",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) - files.extend(h) + if config["run_contrasts"] == "Y": + if (GENOME == "hg19") or (GENOME == "hg38"): + if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): + t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="macs2",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) + files.extend(t) + h=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"),peak_caller="macs2",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) + files.extend(h) + if ("gopeaks_narrow" in PEAKTYPE) or ("gopeaks_broad" in PEAKTYPE): + t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) + files.extend(t) + h=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) + files.extend(h) + if ("seacr_norm_stringent" in PEAKTYPE) or ("seacr_norm_relaxed" in PEAKTYPE) or ("seacr_non_stringent" in PEAKTYPE) or ("seacr_non_relaxed" in PEAKTYPE): + t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="seacr",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) + files.extend(t) + h=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"),peak_caller="seacr",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) + files.extend(h) return files include: "rules/init.smk" @@ -173,27 +174,33 @@ rule all: ########################################## ### required files ########################################## + ## Rules/Init # manifests unpack(run_pipe_prep), + ## Rules/Align # norm table, if needed unpack(run_library_norm), - + # alignment stats yaml files and stats table expand(join(RESULTSDIR,"alignment_stats","{replicate}.alignment_stats.yaml"),replicate=REPLICATES), join(RESULTSDIR,"alignment_stats","alignment_stats.tsv"), - # # PEAKCALLS rules + ## Rules/peakcalls + # PEAKCALLS rules unpack(run_macs2), unpack(run_seacr), unpack(run_gopeaks), + ## Rules/QC # qc unpack(run_qc), + ## Rules/diff # DIFFERENTIAL unpack(run_contrasts), + ## Rules/annotation # ANNOTATION unpack(get_motifs), unpack(get_rose), diff --git a/workflow/rules/annotations.smk b/workflow/rules/annotations.smk index db9e01c..d29db98 100644 --- a/workflow/rules/annotations.smk +++ b/workflow/rules/annotations.smk @@ -259,78 +259,78 @@ rule rose: echo "Less than 5 usable peaks detected (N=${{num_of_peaks}})" > {output.super_summit} fi """ - -rule create_contrast_peakcaller_files: - """ - Reads in all of the output from Rules create_contrast_data_files which match the same peaktype and merges them together - """ - input: - contrast_files=expand(join(RESULTSDIR,"peaks","{qthresholds}","contrasts","{contrast_list}.{dupstatus}","{contrast_list}.{dupstatus}.{peak_caller_type}.txt"),qthresholds=QTRESHOLDS, contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE) - params: - qthresholds = "{qthresholds}", - contrast_list = "{contrast_list}", - dupstatus = "{dupstatus}", - peak_caller = "{peak_caller}", - search_dir=join(RESULTSDIR,"peaks","{qthresholds}","contrasts") - output: - peak_contrast_files=join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt") - shell: +if config["run_contrasts"] == "Y": + rule create_contrast_peakcaller_files: """ - set -exo pipefail - if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then - TMPDIR="/lscratch/$SLURM_JOB_ID" - else - TMPDIR="/dev/shm" - fi + Reads in all of the output from Rules create_contrast_data_files which match the same peaktype and merges them together + """ + input: + contrast_files=expand(join(RESULTSDIR,"peaks","{qthresholds}","contrasts","{contrast_list}.{dupstatus}","{contrast_list}.{dupstatus}.{peak_caller_type}.txt"),qthresholds=QTRESHOLDS, contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE) + params: + qthresholds = "{qthresholds}", + contrast_list = "{contrast_list}", + dupstatus = "{dupstatus}", + peak_caller = "{peak_caller}", + search_dir=join(RESULTSDIR,"peaks","{qthresholds}","contrasts") + output: + peak_contrast_files=join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt") + shell: + """ + set -exo pipefail + if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then + TMPDIR="/lscratch/$SLURM_JOB_ID" + else + TMPDIR="/dev/shm" + fi - # for each of the file, find matches to the peak_type - if [[ -f $$TMPDIR/merge.txt ]]; then rm $TMPDIR/merge.txt; fi + # for each of the file, find matches to the peak_type + if [[ -f $$TMPDIR/merge.txt ]]; then rm $TMPDIR/merge.txt; fi - # pull file list - # /data/sevillas2/carlisle/v2.0/results/peaks/0.05/contrasts/53_H3K4me3_vs_HN6_H3K4me3.dedup/53_H3K4me3_vs_HN6_H3K4me3.dedup.seacr_norm_stringent.txt - for f in {params.search_dir}/{params.contrast_list}.{params.dupstatus}/{params.contrast_list}.{params.dupstatus}.{params.peak_caller}*.txt; do - cat $f >> $TMPDIR/merge.txt - done + # pull file list + # /data/sevillas2/carlisle/v2.0/results/peaks/0.05/contrasts/53_H3K4me3_vs_HN6_H3K4me3.dedup/53_H3K4me3_vs_HN6_H3K4me3.dedup.seacr_norm_stringent.txt + for f in {params.search_dir}/{params.contrast_list}.{params.dupstatus}/{params.contrast_list}.{params.dupstatus}.{params.peak_caller}*.txt; do + cat $f >> $TMPDIR/merge.txt + done - # save to output - cat $TMPDIR/merge.txt | sort | uniq > {output.peak_contrast_files} - rm $TMPDIR/merge.txt - """ + # save to output + cat $TMPDIR/merge.txt | sort | uniq > {output.peak_contrast_files} + rm $TMPDIR/merge.txt + """ -rule go_enrichment: - """ - https://bioconductor.org/packages/devel/bioc/vignettes/chipenrich/inst/doc/chipenrich-vignette.html#peak-distance-to-tss-distribution - """ - input: - contrast_file=rules.create_contrast_peakcaller_files.output.peak_contrast_files - output: - html=join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"), - params: - rscript_wrapper=join(SCRIPTSDIR,"_go_enrichment_wrapper.R"), - rmd=join(SCRIPTSDIR,"_go_enrichment.Rmd"), - rscript_functions=join(SCRIPTSDIR,"_carlisle_functions.R"), - output_dir = join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment"), - species = config["genome"], - geneset_id = GENESET_ID, - dedup_status = "{dupstatus}" - envmodules: - TOOLS["R"] - shell: + rule go_enrichment: """ - set -exo pipefail + https://bioconductor.org/packages/devel/bioc/vignettes/chipenrich/inst/doc/chipenrich-vignette.html#peak-distance-to-tss-distribution + """ + input: + contrast_file=rules.create_contrast_peakcaller_files.output.peak_contrast_files + output: + html=join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"), + params: + rscript_wrapper=join(SCRIPTSDIR,"_go_enrichment_wrapper.R"), + rmd=join(SCRIPTSDIR,"_go_enrichment.Rmd"), + rscript_functions=join(SCRIPTSDIR,"_carlisle_functions.R"), + output_dir = join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment"), + species = config["genome"], + geneset_id = GENESET_ID, + dedup_status = "{dupstatus}" + envmodules: + TOOLS["R"] + shell: + """ + set -exo pipefail - # get sample list - sample_list=`awk '{{print $3}}' {input.contrast_file}` - clean_sample_list=`echo $sample_list | sed "s/\s/xxx/g"` + # get sample list + sample_list=`awk '{{print $3}}' {input.contrast_file}` + clean_sample_list=`echo $sample_list | sed "s/\s/xxx/g"` - # rum script - Rscript {params.rscript_wrapper} \\ - --rmd {params.rmd} \\ - --sourcefile {params.rscript_functions} \\ - --output_dir {params.output_dir} \\ - --report {output.html} \\ - --peak_list "$clean_sample_list" \\ - --species {params.species} \\ - --geneset_id {params.geneset_id} \\ - --dedup_status {params.dedup_status} - """ + # rum script + Rscript {params.rscript_wrapper} \\ + --rmd {params.rmd} \\ + --sourcefile {params.rscript_functions} \\ + --output_dir {params.output_dir} \\ + --report {output.html} \\ + --peak_list "$clean_sample_list" \\ + --species {params.species} \\ + --geneset_id {params.geneset_id} \\ + --dedup_status {params.dedup_status} + """