Skip to content

Commit

Permalink
Merge pull request #76 from CCBR/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
slsevilla authored Apr 21, 2023
2 parents c85c15a + 37512f0 commit 59d65e7
Show file tree
Hide file tree
Showing 16 changed files with 576 additions and 266 deletions.
24 changes: 11 additions & 13 deletions .github/workflows/build_mkdocs.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
name: mkdocs_build
on:
workflow_dispatch:
push:
branches:
- master
paths:
- 'docs/**'
jobs:
build:
name: Deploy docs
deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout main
uses: actions/checkout@v2
- name: Deploy docs
uses: mhausenblas/mkdocs-deploy-gh-pages@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CONFIG_FILE: mkdocs.yml
EXTRA_PACKAGES: build-base
REQUIREMENTS: docs/requirements.txt
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.9
- run: pip install --upgrade pip
- run: pip install -r docs/requirements.txt
- run: mkdocs gh-deploy --force
25 changes: 25 additions & 0 deletions .github/workflows/lintr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: lintr
on:
push:
branches:
- master
- dev
jobs:
Lintr:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: docker://snakemake/snakemake:v7.19.1
- name: Lint Workflow
continue-on-error: true
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v7.19.1 /bin/bash -c \
"mkdir -p /opt2/output_carlisle/config /opt2/output_carlisle/annotation && \
cp -r /opt2/workflow/scripts/ /opt2/output_carlisle/ && \
cp /opt2/resources/cluster_biowulf.yaml /opt2/output_carlisle/config/cluster.yaml && \
cp /opt2/resources/tools_biowulf.yaml /opt2/output_carlisle/config/tools.yaml && \
cd /opt2/output_carlisle/annotation && \
touch hg38.fa genes.gtf hg38.bed hg38.tss.bed hg38_refseq.ucsc Ecoli_GCF_000005845.2_ASM584v2_genomic.fna adapters.fa && \
snakemake --lint -s /opt2/workflow/Snakefile \
-d /opt2/output_carlisle --configfile /opt2/.test/config_lint.yaml || \
echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'"
16 changes: 16 additions & 0 deletions .github/workflows/test_dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: DevTesting
on:
push:
branches:
- dev
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Dev Testing Workflow
uses: snakemake/snakemake-github-action@v1
with:
directory: '.test'
snakefile: 'workflow/Snakefile'
args: '--cores 1 --use-conda --conda-cleanup-pkgs cache'
stagein: '' # additional preliminary commands to run (can be multiline)
147 changes: 147 additions & 0 deletions .test/config_lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#####################################################################################
# Folders / Paths
#####################################################################################
# The working dir... output will be in the results subfolder of the /opt2/output_carlisle
workdir: "/opt2/output_carlisle"

# tab delimited samples file .. see samplefile for format details
samplemanifest: "/opt2/.test/samples.test_lintr.tsv"

#####################################################################################
# User parameters
#####################################################################################
# run sample contrasts
run_contrasts: "Y" # Y or N
contrasts: "/opt2/.test/contrasts.test.tsv" # run_contrasts needs to be "Y"
contrasts_fdr_cutoff: "0.05"
contrasts_lfc_cutoff: "0.59" # FC of 1.5

# reference
genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below.

# alignment quality threshold
mapping_quality: 2 #only report alignment records with mapping quality of at least N (>= N).

# normalization method
## spikein: normalization will be performed based off of spike-in aligned read count;
## library: library normalization will be performed
## none: no norm will be performed
norm_method: "spikein" # method of normalization to be used; currently supports ["spikein","library","none"]
## if norm_method ="spikein"
spikein_genome: "ecoli" # must be species found in spikein_reference below
spikein_scale: 1000000

# user parameters for alignment
bowtie2_parameters: "--dovetail --phred33 --very-sensitive"
fragment_len_filter: "1000"

# duplication status
## users can select duplicated peaks (dedup) or non-deduplicated peaks (no_dedup)
### dupstatus: "dedup" # means run deduplicated analysis only
### dupstatus: "no_dedup" # means run non-deduplicated analysis only
## complete list:
### dupstatus: "dedup, no_dedup"
dupstatus: "dedup"

# which peaktypes to consider for differential analysis:
# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA |
## macs2 options: macs2_narrow, macs2_broad
### NOTE: DESeq step generally fails for broadPeak; generally has too many calls.

# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent| Non-Normalized, Relaxed |
# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN |
## seacr options: seacr_stringent, seacr_relaxed

# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA |
## gopeaks options: gopeaks_narrow, gopeaks_broad

# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA |
# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN |
# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA |
## complete list:
### peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad"
peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad"

## macs2 additional option
### macs2 can be run with or without the control. adding a control will increase peak specificity
### default is "N"; selecting "Y" will run the paired control sample provided in the sample manifest
macs2_control: "N"

# qvalues
## thresholds to be used for peak callers
## must be a list of comma separated values. minimum of numeric value required.
### default MACS2 qvalue is 0.05 https://manpages.ubuntu.com/manpages/xenial/man1/macs2_callpeak.1.html
### default GOPEAKS pvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md
### default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md
quality_thresholds: "0.1, 0.05"

## MACS2, broad-peaks specific, quality threshold
### if broadPeak is seleted as a 'peaktype', an additional quality threshold can be used
macs2_broad_peak_threshold: "0.01"

# annotations
## rose parameters
stitch_distance: 12500
tss_distance: 2500

## homer
motif_size: 1000
preparsedDir: "/data/CCBR_Pipeliner/db/PipeDB/homer/preparsedDir"

## GO Enrichment
## enrichment analysis can be performed on hg19 or hg38 samples
## one option may be chosen for each project
geneset_id: "GOBP" # ["GOBP" "GOCC" "GOMF" "KEGG"]

#####################################################################################
# References
# NOTE: "gtf" is only required if TxDb is not avaiable for the species in
# Bioconductor eg. hs1
#####################################################################################
# references:
reference:
hg38:
fa: "/opt2/output_carlisle/annotation/hg38.fa"
gtf: "/opt2/output_carlisle/annotation/genes.gtf"
blacklist: "/opt2/output_carlisle/annotation/hg38.bed"
regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
macs2_g: "hs"
tss_bed: "/opt2/output_carlisle/annotation/hg38.tss.bed"
rose: "/opt2/output_carlisle/annotation/hg38_refseq.ucsc"
hg19:
fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/hg19.fa"
gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf"
blacklist: "PIPELINE_HOME/resources/blacklistbed/hg19.bed"
regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
macs2_g: "hs"
tss_bed: "PIPELINE_HOME/resources/tss_bed/hg19.tss.bed"
rose: "/opt2/output_carlisle/annotation/hg19_refseq.ucsc"
mm10:
fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10.fa"
gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/genes.gtf"
blacklist: "PIPELINE_HOME/resources/blacklistbed/mm10.bed"
regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY"
macs2_g: "mm"
hs1:
fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/hs1.fa"
gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf"
blacklist: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/T2T.excluderanges.bed"
tss_bed: "PIPELINE_HOME/resources/tss_bed/hs1.tss.bed"
regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
macs2_g: "3.1e+8"
rose: "/opt2/output_carlisle/annotation/hs1_refseq.ucsc"
# ref: https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html
# used faCount from http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/ to get 3.1e+8 value above

spikein_reference:
ecoli:
fa: "/opt2/output_carlisle/annotation/Ecoli_GCF_000005845.2_ASM584v2_genomic.fna"
drosophila:
fa: "/fdb/igenomes/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa"
saccharomyces:
fa: "PIPELINE_HOME/resources/spikein/S_cer_S288C_R64.fna"

adapters: "/opt2/output_carlisle/annotation/adapters.fa"
10 changes: 10 additions & 0 deletions .test/samples.test_lintr.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
sampleName replicateNumber isControl controlName controlReplicateNumber path_to_R1 path_to_R2
53_H3K4me3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H3K4me3_1.R1.fastq.gz /opt2/.test/53_H3K4me3_1.R2.fastq.gz
53_H3K4me3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H3K4me3_2.R1.fastq.gz /opt2/.test/53_H3K4me3_2.R2.fastq.gz
HN6_H3K4me3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H3K4me3_1.R1.fastq.gz /opt2/.test/HN6_H3K4me3_1.R2.fastq.gz
HN6_H3K4me3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H3K4me3_2.R1.fastq.gz /opt2/.test/HN6_H3K4me3_2.R2.fastq.gz
53_H4K20m3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H4K20m3_1.R1.fastq.gz /opt2/.test/53_H4K20m3_1.R2.fastq.gz
53_H4K20m3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H4K20m3_2.R1.fastq.gz /opt2/.test/53_H4K20m3_2.R2.fastq.gz
HN6_H4K20me3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H4K20me3_1.R1.fastq.gz /opt2/.test/HN6_H4K20me3_1.R2.fastq.gz
HN6_H4K20me3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H4K20me3_2.R1.fastq.gz /opt2/.test/HN6_H4K20me3_2.R2.fastq.gz
HN6_IgG_rabbit_negative_control 1 Y - - /opt2/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz /opt2/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz
32 changes: 22 additions & 10 deletions carlisle
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,34 @@
#
# DISCLAIMER: This wrapper only works on BIOWULF

PYTHON_VERSION="python/3.7"
SNAKEMAKE_VERSION="snakemake"
SINGULARITY_VERSION="singularity"
PYTHON_VERSION="python/3.9"
SNAKEMAKE_VERSION="snakemake/7.19.1"
SINGULARITY_VERSION="singularity/3.10.5"

set -eo pipefail
module purge

SCRIPTNAME="$0"
SCRIPTBASENAME=$(readlink -f $(basename $0))

#define cluster, partitions dependent on host
hostID=`echo $HOSTNAME`
if [[ $hostID == "biowulf.nih.gov" ]]; then
BUYINPARTITIONS=$(bash <(curl -s https://raw.githubusercontent.com/CCBR/Tools/master/Biowulf/get_buyin_partition_list.bash 2>/dev/null))
PARTITIONS="norm,ccr"
cluster_specific_yaml="cluster_biowulf.yaml"
tools_specific_yaml="tools_biowulf.yaml"
#if [ $BUYINPARTITIONS ];then PARTITIONS="norm,$BUYINPARTITIONS";fi
elif [[ $hostID == "biowulf8.nih.gov" ]]; then
PARTITIONS="rhel8"
cluster_specific_yaml="cluster_rhel8.yaml"
tools_specific_yaml="tools_rhel8.yaml"
fi

# essential files
# these are relative to the workflows' base folder
# these are copied into the WORKDIR
ESSENTIAL_FILES="config/config.yaml config/samples.tsv config/contrasts.tsv config/fqscreen_config.conf config/multiqc_config.yaml resources/cluster.yaml resources/tools.yaml"
ESSENTIAL_FILES="config/config.yaml config/samples.tsv config/contrasts.tsv config/fqscreen_config.conf config/multiqc_config.yaml resources/cluster_* resources/tools_*"
ESSENTIAL_FOLDERS="workflow/scripts"
# set extra singularity bindings
EXTRA_SINGULARITY_BINDS="-B /data/CCBR_Pipeliner/,/lscratch"
Expand Down Expand Up @@ -97,6 +111,10 @@ function init() {
sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/$f > $WORKDIR/config/$fbn
done

# rename config dependent on partition used
cp $WORKDIR/config/$cluster_specific_yaml $WORKDIR/config/cluster.yaml
cp $WORKDIR/config/$tools_specific_yaml $WORKDIR/config/tools.yaml

# copy essential folders
for f in $ESSENTIAL_FOLDERS;do
rsync -avz --no-perms --no-owner --no-group --progress $PIPELINE_HOME/$f $WORKDIR/
Expand Down Expand Up @@ -302,12 +320,6 @@ function run() {
elif [ "$1" == "slurm" ];then

preruncleanup
# if QOS is other than "global" and is supplied in the cluster.yaml file then add " --qos={cluster.qos}" to the
# snakemake command below
#define partitions
BUYINPARTITIONS=$(bash <(curl -s https://raw.githubusercontent.com/CCBR/Tools/master/Biowulf/get_buyin_partition_list.bash 2>/dev/null))
PARTITIONS="norm,ccr"
#if [ $BUYINPARTITIONS ];then PARTITIONS="norm,$BUYINPARTITIONS";fi

cat > ${WORKDIR}/submit_script.sbatch << EOF
#!/bin/bash
Expand Down
Loading

0 comments on commit 59d65e7

Please sign in to comment.