Skip to content

Commit

Permalink
Merge pull request #43 from CCBR/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
slsevilla authored Feb 17, 2023
2 parents b018de9 + f469c97 commit 687058f
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 80 deletions.
11 changes: 7 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ macs2_control: "N"
### default GOPEAKS pvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md
### default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md
quality_thresholds: "0.1, 0.05, 0.01"
## MACS2, broad-peaks specific, quality threshold
### if broadPeak is seleted as a 'peaktype', an additional quality threshold can be used
macs2_broad_peak_threshold: "0.01"

# annotations
## rose parameters
Expand All @@ -93,23 +96,23 @@ preparsedDir: "/data/CCBR_Pipeliner/db/PipeDB/homer/preparsedDir"
reference:
hg38:
fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/hg38.fa"
gtf: "tbd"
gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/genes.gtf"
blacklist: "PIPELINE_HOME/resources/blacklistbed/hg38.bed"
regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
macs2_g: "hs"
tss_bed: "PIPELINE_HOME/resources/tss_bed/hg38.tss.bed"
rose: "WORKDIR/annotation/hg38_refseq.ucsc"
hg19:
fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/hg19.fa"
gtf: "tbd"
gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf"
blacklist: "PIPELINE_HOME/resources/blacklistbed/hg19.bed"
regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
macs2_g: "hs"
tss_bed: "PIPELINE_HOME/resources/tss_bed/hg19.tss.bed"
rose: "WORKDIR/annotation/hg19_refseq.ucsc"
mm10:
fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10.fa"
gtf: "tbd"
gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/genes.gtf"
blacklist: "PIPELINE_HOME/resources/blacklistbed/mm10.bed"
regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY"
macs2_g: "mm"
Expand All @@ -132,4 +135,4 @@ spikein_reference:
saccharomyces:
fa: "PIPELINE_HOME/resources/spikein/S_cer_S288C_R64.fna"

adapters: "PIPELINE_HOME/resources/other/adapters.fa"
adapters: "PIPELINE_HOME/resources/other/adapters.fa"
16 changes: 11 additions & 5 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,21 @@ def run_seacr(wildcards):
files.extend(s)
s2=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.norm.stringent.bigbed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(s2)
s3=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.non.stringent.bigbed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(s3)
if "non.stringent.bed" in PEAKTYPE:
s=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.non.stringent.bed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(s)
s2=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.non.stringent.bigbed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(s2)
if "norm.relaxed.bed" in PEAKTYPE:
r=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.norm.relaxed.bed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(r)
r2=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.norm.relaxed.bigbed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(r2)
r3=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.non.relaxed.bigbed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(r3)
if "non.relaxed.bed" in PEAKTYPE:
r=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.non.relaxed.bed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(r)
r2=expand(join(RESULTSDIR,"peaks","{qthresholds}","seacr","{treatment}_vs_{control}","{treatment}_vs_{control}.{dupstatus}.non.relaxed.bigbed"),qthresholds=QTRESHOLDS,treatment=TREATMENTS,control=CONTROLS,dupstatus=DUPSTATUS),
files.extend(r2)
return files

def run_gopeaks(wildcards):
Expand Down Expand Up @@ -92,7 +98,7 @@ def get_annotation_macs2(wildcards):

def get_annotation_s_and_g(wildcards):
files=[]
if ("narrowGo_peaks.bed" in PEAKTYPE) or ("broadGo_peaks.bed" in PEAKTYPE) or ("norm.stringent.bed" in PEAKTYPE) or ("norm.relaxed.bed" in PEAKTYPE):
if ("narrowGo_peaks.bed" in PEAKTYPE) or ("broadGo_peaks.bed" in PEAKTYPE) or ("norm.stringent.bed" in PEAKTYPE) or ("norm.relaxed.bed" in PEAKTYPE) or ("non.stringent.bed" in PEAKTYPE) or ("non.relaxed.bed" in PEAKTYPE):
f=expand(join(RESULTSDIR,"annotation","{qthresholds}","{s_and_g_types}","{t_and_c}","{t_and_c}.{dupstatus}.annotation.txt"),qthresholds=QTRESHOLDS,s_and_g_types=S_AND_G_TYPES,t_and_c=TREATMENT_CONTROL_LIST,dupstatus=DUPSTATUS)
files.extend(f)
return files
Expand Down
3 changes: 2 additions & 1 deletion workflow/rules/align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,11 @@ rule bam2bg:
if [[ "{params.spikein}" == "" ]];then
spikein_scale=1
elif [[ "{params.spikein}" == "LIBRARY" ]];then
library_size=`cat {params.library_file} | grep {params.replicate} | cut -f2 -d","`
library_size=`cat {params.library_file} | grep {params.replicate} | cut -f2 -d"," | head -n1`
if [[ $library_size =~ "Inf" ]]; then
spikein_scale=`head -n1 {params.library_file} | awk -F"," \'{{print $2}}\'`
spikein_scale=$(echo "$spikein_scale / 1000000" | bc -l)
else
spikein_scale=$(echo "$library_size / 1000000" | bc -l)
fi
Expand Down
20 changes: 18 additions & 2 deletions workflow/rules/annotations.smk
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,17 @@ rule peakAnnotation_macs2:
"""

def get_bed_s_and_g(wildcards):
# SEACR OPTIONS
if wildcards.s_and_g_types =="norm.stringent.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.t_and_c, wildcards.t_and_c + "." + wildcards.dupstatus + ".norm.stringent.bed")
if wildcards.s_and_g_types =="norm.relaxed.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.t_and_c, wildcards.t_and_c + "." + wildcards.dupstatus + ".norm.relaxed.bed")
if wildcards.s_and_g_types =="non.stringent.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.t_and_c, wildcards.t_and_c + "." + wildcards.dupstatus + ".non.stringent.bed")
if wildcards.s_and_g_types =="non.relaxed.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.t_and_c, wildcards.t_and_c + "." + wildcards.dupstatus + ".non.relaxed.bed")

#GOPEAKS OPTIONS
if wildcards.s_and_g_types =="narrowGo_peaks.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"gopeaks", wildcards.t_and_c + "." + wildcards.dupstatus + ".narrowGo_peaks.bed")
if wildcards.s_and_g_types =="broadGo_peaks.bed":
Expand Down Expand Up @@ -71,14 +78,23 @@ def get_cntrl_bam(wildcards):

def get_bed_all(wildcards):
cntrl=TREAT_to_CONTRL_DICT[wildcards.treatment]
#MACS2 OPTIONS
if wildcards.peak_types == "narrowPeak":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"macs2", wildcards.treatment, wildcards.treatment + "." + wildcards.dupstatus + "_peaks.narrowPeak")
if wildcards.peak_types =="broadPeak":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"macs2", wildcards.treatment, wildcards.treatment + "." + wildcards.dupstatus + "_peaks.broadPeak")

#SEACR OPTIONS
if wildcards.peak_types =="norm.stringent.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.treatment + "_vs_" + cntrl, wildcards.treatment + "_vs_" + cntrl + "." + wildcards.dupstatus + ".norm.stringent.bed")
if wildcards.peak_types =="norm.relaxed.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.treatment + "_vs_" + cntrl, wildcards.treatment + "_vs_" + cntrl + "." + wildcards.dupstatus + ".norm.relaxed.bed")
if wildcards.peak_types =="non.stringent.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.treatment + "_vs_" + cntrl, wildcards.treatment + "_vs_" + cntrl + "." + wildcards.dupstatus + ".non.stringent.bed")
if wildcards.peak_types =="non.relaxed.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"seacr", wildcards.treatment + "_vs_" + cntrl, wildcards.treatment + "_vs_" + cntrl + "." + wildcards.dupstatus + ".non.relaxed.bed")

#GOPEAKS OPTIONS
if wildcards.peak_types =="narrowGo_peaks.bed":
bed=join(RESULTSDIR,"peaks",wildcards.qthresholds,"gopeaks", wildcards.treatment + "_vs_" + cntrl + "." + wildcards.dupstatus + ".narrowGo_peaks.bed")
if wildcards.peak_types =="broadGo_peaks.bed":
Expand Down Expand Up @@ -184,10 +200,10 @@ rule rose:
nl --number-format=rz --number-width=3 $TMPDIR/subset.bed | awk -v sample_id="{params.sampleID}_" \'{{print sample_id$1"\\t0\\t."}}\' > $TMPDIR/col.txt
paste -d "\t" $TMPDIR/save.bed $TMPDIR/col.txt > $TMPDIR/subset.bed
fi
## correct GOPEAKS
## correct SEACR
### original: <chr> <start> <end> <total signal> <max signal> <max signal region>
### output: <chr> <start> <end> <$sampleid_uniquenumber> <total signal> <.>
if [[ {params.peak_type} == "norm.stringent.bed" ]] || [[ {params.peak_type} == "norm.relaxed.bed" ]]; then
if [[ {params.peak_type} == "norm.stringent.bed" ]] || [[ {params.peak_type} == "norm.relaxed.bed" ]] || [[ {params.peak_type} == "non.relaxed.bed" ]] || [[ {params.peak_type} == "non.relaxed.bed" ]]; then
cp $TMPDIR/subset.bed $TMPDIR/save.bed
awk -v sample_id="{params.sampleID}_" \'{{print $1"\\t"$2"\\t"$3"\\t"sample_id$1"\\t"$4"\\t."}}\' $TMPDIR/subset.bed > $TMPDIR/col.txt
paste -d "\t" $TMPDIR/save.bed $TMPDIR/col.txt > $TMPDIR/subset.bed
Expand Down
Loading

0 comments on commit 687058f

Please sign in to comment.