diff --git a/.github/workflows/build_mkdocs.yaml b/.github/workflows/build_mkdocs.yaml index b437fa1..c01e496 100644 --- a/.github/workflows/build_mkdocs.yaml +++ b/.github/workflows/build_mkdocs.yaml @@ -1,19 +1,17 @@ name: mkdocs_build on: + workflow_dispatch: push: - branches: - - master + paths: + - 'docs/**' jobs: - build: - name: Deploy docs + deploy: runs-on: ubuntu-latest steps: - - name: Checkout main - uses: actions/checkout@v2 - - name: Deploy docs - uses: mhausenblas/mkdocs-deploy-gh-pages@master - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - CONFIG_FILE: mkdocs.yml - EXTRA_PACKAGES: build-base - REQUIREMENTS: docs/requirements.txt + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.9 + - run: pip install --upgrade pip + - run: pip install -r docs/requirements.txt + - run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/.github/workflows/lintr.yaml b/.github/workflows/lintr.yaml new file mode 100644 index 0000000..ab839dc --- /dev/null +++ b/.github/workflows/lintr.yaml @@ -0,0 +1,25 @@ +name: lintr +on: + push: + branches: + - master + - dev +jobs: + Lintr: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: docker://snakemake/snakemake:v7.19.1 + - name: Lint Workflow + continue-on-error: true + run: | + docker run -v $PWD:/opt2 snakemake/snakemake:v7.19.1 /bin/bash -c \ + "mkdir -p /opt2/output_carlisle/config /opt2/output_carlisle/annotation && \ + cp -r /opt2/workflow/scripts/ /opt2/output_carlisle/ && \ + cp /opt2/resources/cluster_biowulf.yaml /opt2/output_carlisle/config/cluster.yaml && \ + cp /opt2/resources/tools_biowulf.yaml /opt2/output_carlisle/config/tools.yaml && \ + cd /opt2/output_carlisle/annotation && \ + touch hg38.fa genes.gtf hg38.bed hg38.tss.bed hg38_refseq.ucsc Ecoli_GCF_000005845.2_ASM584v2_genomic.fna adapters.fa && \ + snakemake --lint -s /opt2/workflow/Snakefile \ + -d /opt2/output_carlisle --configfile /opt2/.test/config_lint.yaml || \ + echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'" \ No newline at end of file diff --git a/.github/workflows/test_dev.yml b/.github/workflows/test_dev.yml new file mode 100644 index 0000000..ad132e4 --- /dev/null +++ b/.github/workflows/test_dev.yml @@ -0,0 +1,16 @@ +name: DevTesting +on: + push: + branches: + - dev +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Dev Testing Workflow + uses: snakemake/snakemake-github-action@v1 + with: + directory: '.test' + snakefile: 'workflow/Snakefile' + args: '--cores 1 --use-conda --conda-cleanup-pkgs cache' + stagein: '' # additional preliminary commands to run (can be multiline) \ No newline at end of file diff --git a/.test/config_lint.yaml b/.test/config_lint.yaml new file mode 100644 index 0000000..bdaa388 --- /dev/null +++ b/.test/config_lint.yaml @@ -0,0 +1,147 @@ +##################################################################################### +# Folders / Paths +##################################################################################### +# The working dir... output will be in the results subfolder of the /opt2/output_carlisle +workdir: "/opt2/output_carlisle" + +# tab delimited samples file .. see samplefile for format details +samplemanifest: "/opt2/.test/samples.test_lintr.tsv" + +##################################################################################### +# User parameters +##################################################################################### +# run sample contrasts +run_contrasts: "Y" # Y or N +contrasts: "/opt2/.test/contrasts.test.tsv" # run_contrasts needs to be "Y" +contrasts_fdr_cutoff: "0.05" +contrasts_lfc_cutoff: "0.59" # FC of 1.5 + +# reference +genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. + +# alignment quality threshold +mapping_quality: 2 #only report alignment records with mapping quality of at least N (>= N). + +# normalization method +## spikein: normalization will be performed based off of spike-in aligned read count; +## library: library normalization will be performed +## none: no norm will be performed +norm_method: "spikein" # method of normalization to be used; currently supports ["spikein","library","none"] +## if norm_method ="spikein" +spikein_genome: "ecoli" # must be species found in spikein_reference below +spikein_scale: 1000000 + +# user parameters for alignment +bowtie2_parameters: "--dovetail --phred33 --very-sensitive" +fragment_len_filter: "1000" + +# duplication status +## users can select duplicated peaks (dedup) or non-deduplicated peaks (no_dedup) +### dupstatus: "dedup" # means run deduplicated analysis only +### dupstatus: "no_dedup" # means run non-deduplicated analysis only +## complete list: +### dupstatus: "dedup, no_dedup" +dupstatus: "dedup" + +# which peaktypes to consider for differential analysis: +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | +# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +## macs2 options: macs2_narrow, macs2_broad +### NOTE: DESeq step generally fails for broadPeak; generally has too many calls. + +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent| Non-Normalized, Relaxed | +# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN | +## seacr options: seacr_stringent, seacr_relaxed + +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | +# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +## gopeaks options: gopeaks_narrow, gopeaks_broad + +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | +# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN | +# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +## complete list: +### peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad" +peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad" + +## macs2 additional option +### macs2 can be run with or without the control. adding a control will increase peak specificity +### default is "N"; selecting "Y" will run the paired control sample provided in the sample manifest +macs2_control: "N" + +# qvalues +## thresholds to be used for peak callers +## must be a list of comma separated values. minimum of numeric value required. +### default MACS2 qvalue is 0.05 https://manpages.ubuntu.com/manpages/xenial/man1/macs2_callpeak.1.html +### default GOPEAKS pvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md +### default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md +quality_thresholds: "0.1, 0.05" + +## MACS2, broad-peaks specific, quality threshold +### if broadPeak is seleted as a 'peaktype', an additional quality threshold can be used +macs2_broad_peak_threshold: "0.01" + +# annotations +## rose parameters +stitch_distance: 12500 +tss_distance: 2500 + +## homer +motif_size: 1000 +preparsedDir: "/data/CCBR_Pipeliner/db/PipeDB/homer/preparsedDir" + +## GO Enrichment +## enrichment analysis can be performed on hg19 or hg38 samples +## one option may be chosen for each project +geneset_id: "GOBP" # ["GOBP" "GOCC" "GOMF" "KEGG"] + +##################################################################################### +# References +# NOTE: "gtf" is only required if TxDb is not avaiable for the species in +# Bioconductor eg. hs1 +##################################################################################### +# references: +reference: + hg38: + fa: "/opt2/output_carlisle/annotation/hg38.fa" + gtf: "/opt2/output_carlisle/annotation/genes.gtf" + blacklist: "/opt2/output_carlisle/annotation/hg38.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" + macs2_g: "hs" + tss_bed: "/opt2/output_carlisle/annotation/hg38.tss.bed" + rose: "/opt2/output_carlisle/annotation/hg38_refseq.ucsc" + hg19: + fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/hg19.fa" + gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf" + blacklist: "PIPELINE_HOME/resources/blacklistbed/hg19.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" + macs2_g: "hs" + tss_bed: "PIPELINE_HOME/resources/tss_bed/hg19.tss.bed" + rose: "/opt2/output_carlisle/annotation/hg19_refseq.ucsc" + mm10: + fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10.fa" + gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/genes.gtf" + blacklist: "PIPELINE_HOME/resources/blacklistbed/mm10.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY" + macs2_g: "mm" + hs1: + fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/hs1.fa" + gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf" + blacklist: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/T2T.excluderanges.bed" + tss_bed: "PIPELINE_HOME/resources/tss_bed/hs1.tss.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" + macs2_g: "3.1e+8" + rose: "/opt2/output_carlisle/annotation/hs1_refseq.ucsc" +# ref: https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html +# used faCount from http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/ to get 3.1e+8 value above + +spikein_reference: + ecoli: + fa: "/opt2/output_carlisle/annotation/Ecoli_GCF_000005845.2_ASM584v2_genomic.fna" + drosophila: + fa: "/fdb/igenomes/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + saccharomyces: + fa: "PIPELINE_HOME/resources/spikein/S_cer_S288C_R64.fna" + +adapters: "/opt2/output_carlisle/annotation/adapters.fa" diff --git a/.test/samples.test_lintr.tsv b/.test/samples.test_lintr.tsv new file mode 100644 index 0000000..82391e9 --- /dev/null +++ b/.test/samples.test_lintr.tsv @@ -0,0 +1,10 @@ +sampleName replicateNumber isControl controlName controlReplicateNumber path_to_R1 path_to_R2 +53_H3K4me3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H3K4me3_1.R1.fastq.gz /opt2/.test/53_H3K4me3_1.R2.fastq.gz +53_H3K4me3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H3K4me3_2.R1.fastq.gz /opt2/.test/53_H3K4me3_2.R2.fastq.gz +HN6_H3K4me3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H3K4me3_1.R1.fastq.gz /opt2/.test/HN6_H3K4me3_1.R2.fastq.gz +HN6_H3K4me3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H3K4me3_2.R1.fastq.gz /opt2/.test/HN6_H3K4me3_2.R2.fastq.gz +53_H4K20m3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H4K20m3_1.R1.fastq.gz /opt2/.test/53_H4K20m3_1.R2.fastq.gz +53_H4K20m3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/53_H4K20m3_2.R1.fastq.gz /opt2/.test/53_H4K20m3_2.R2.fastq.gz +HN6_H4K20me3 1 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H4K20me3_1.R1.fastq.gz /opt2/.test/HN6_H4K20me3_1.R2.fastq.gz +HN6_H4K20me3 2 N HN6_IgG_rabbit_negative_control 1 /opt2/.test/HN6_H4K20me3_2.R1.fastq.gz /opt2/.test/HN6_H4K20me3_2.R2.fastq.gz +HN6_IgG_rabbit_negative_control 1 Y - - /opt2/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz /opt2/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz \ No newline at end of file diff --git a/carlisle b/carlisle index e3de95f..c2e1ac0 100755 --- a/carlisle +++ b/carlisle @@ -9,9 +9,9 @@ # # DISCLAIMER: This wrapper only works on BIOWULF -PYTHON_VERSION="python/3.7" -SNAKEMAKE_VERSION="snakemake" -SINGULARITY_VERSION="singularity" +PYTHON_VERSION="python/3.9" +SNAKEMAKE_VERSION="snakemake/7.19.1" +SINGULARITY_VERSION="singularity/3.10.5" set -eo pipefail module purge @@ -19,10 +19,24 @@ module purge SCRIPTNAME="$0" SCRIPTBASENAME=$(readlink -f $(basename $0)) +#define cluster, partitions dependent on host +hostID=`echo $HOSTNAME` +if [[ $hostID == "biowulf.nih.gov" ]]; then + BUYINPARTITIONS=$(bash <(curl -s https://raw.githubusercontent.com/CCBR/Tools/master/Biowulf/get_buyin_partition_list.bash 2>/dev/null)) + PARTITIONS="norm,ccr" + cluster_specific_yaml="cluster_biowulf.yaml" + tools_specific_yaml="tools_biowulf.yaml" + #if [ $BUYINPARTITIONS ];then PARTITIONS="norm,$BUYINPARTITIONS";fi +elif [[ $hostID == "biowulf8.nih.gov" ]]; then + PARTITIONS="rhel8" + cluster_specific_yaml="cluster_rhel8.yaml" + tools_specific_yaml="tools_rhel8.yaml" +fi + # essential files # these are relative to the workflows' base folder # these are copied into the WORKDIR -ESSENTIAL_FILES="config/config.yaml config/samples.tsv config/contrasts.tsv config/fqscreen_config.conf config/multiqc_config.yaml resources/cluster.yaml resources/tools.yaml" +ESSENTIAL_FILES="config/config.yaml config/samples.tsv config/contrasts.tsv config/fqscreen_config.conf config/multiqc_config.yaml resources/cluster_* resources/tools_*" ESSENTIAL_FOLDERS="workflow/scripts" # set extra singularity bindings EXTRA_SINGULARITY_BINDS="-B /data/CCBR_Pipeliner/,/lscratch" @@ -97,6 +111,10 @@ function init() { sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/$f > $WORKDIR/config/$fbn done + # rename config dependent on partition used + cp $WORKDIR/config/$cluster_specific_yaml $WORKDIR/config/cluster.yaml + cp $WORKDIR/config/$tools_specific_yaml $WORKDIR/config/tools.yaml + # copy essential folders for f in $ESSENTIAL_FOLDERS;do rsync -avz --no-perms --no-owner --no-group --progress $PIPELINE_HOME/$f $WORKDIR/ @@ -302,12 +320,6 @@ function run() { elif [ "$1" == "slurm" ];then preruncleanup - # if QOS is other than "global" and is supplied in the cluster.yaml file then add " --qos={cluster.qos}" to the - # snakemake command below - #define partitions - BUYINPARTITIONS=$(bash <(curl -s https://raw.githubusercontent.com/CCBR/Tools/master/Biowulf/get_buyin_partition_list.bash 2>/dev/null)) - PARTITIONS="norm,ccr" - #if [ $BUYINPARTITIONS ];then PARTITIONS="norm,$BUYINPARTITIONS";fi cat > ${WORKDIR}/submit_script.sbatch << EOF #!/bin/bash diff --git a/config/config.yaml b/config/config.yaml index d387e10..70e996a 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,147 +1,147 @@ -##################################################################################### -# Folders / Paths -##################################################################################### -# The working dir... output will be in the results subfolder of the workdir -workdir: "WORKDIR" - -# tab delimited samples file .. see samplefile for format details -samplemanifest: "WORKDIR/config/samples.tsv" - -##################################################################################### -# User parameters -##################################################################################### -# run sample contrasts -run_contrasts: "Y" # Y or N -contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be "Y" -contrasts_fdr_cutoff: "0.05" -contrasts_lfc_cutoff: "0.59" # FC of 1.5 - -# reference -genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. - -# alignment quality threshold -mapping_quality: 2 #only report alignment records with mapping quality of at least N (>= N). - -# normalization method -## spikein: normalization will be performed based off of spike-in aligned read count; -## library: library normalization will be performed -## none: no norm will be performed -norm_method: "spikein" # method of normalization to be used; currently supports ["spikein","library","none"] -## if norm_method ="spikein" -spikein_genome: "ecoli" # must be species found in spikein_reference below -spikein_scale: 1000000 - -# user parameters for alignment -bowtie2_parameters: "--dovetail --phred33 --very-sensitive" -fragment_len_filter: "1000" - -# duplication status -## users can select duplicated peaks (dedup) or non-deduplicated peaks (no_dedup) -### dupstatus: "dedup" # means run deduplicated analysis only -### dupstatus: "no_dedup" # means run non-deduplicated analysis only -## complete list: -### dupstatus: "dedup, no_dedup" -dupstatus: "dedup" - -# which peaktypes to consider for differential analysis: -# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | -# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA | -## macs2 options: macs2_narrow, macs2_broad -### NOTE: DESeq step generally fails for broadPeak; generally has too many calls. - -# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent| Non-Normalized, Relaxed | -# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN | -## seacr options: seacr_stringent, seacr_relaxed - -# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | -# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA | -## gopeaks options: gopeaks_narrow, gopeaks_broad - -# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | -# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA | -# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN | -# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA | -## complete list: -### peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad" -peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad" - -## macs2 additional option -### macs2 can be run with or without the control. adding a control will increase peak specificity -### default is "N"; selecting "Y" will run the paired control sample provided in the sample manifest -macs2_control: "N" - -# qvalues -## thresholds to be used for peak callers -## must be a list of comma separated values. minimum of numeric value required. -### default MACS2 qvalue is 0.05 https://manpages.ubuntu.com/manpages/xenial/man1/macs2_callpeak.1.html -### default GOPEAKS pvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md -### default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md -quality_thresholds: "0.1, 0.05" - -## MACS2, broad-peaks specific, quality threshold -### if broadPeak is seleted as a 'peaktype', an additional quality threshold can be used -macs2_broad_peak_threshold: "0.01" - -# annotations -## rose parameters -stitch_distance: 12500 -tss_distance: 2500 - -## homer -motif_size: 1000 -preparsedDir: "/data/CCBR_Pipeliner/db/PipeDB/homer/preparsedDir" - -## GO Enrichment -## enrichment analysis can be performed on hg19 or hg38 samples -## one option may be chosen for each project -geneset_id: "GOBP" # ["GOBP" "GOCC" "GOMF" "KEGG"] - -##################################################################################### -# References -# NOTE: "gtf" is only required if TxDb is not avaiable for the species in -# Bioconductor eg. hs1 -##################################################################################### -# references: -reference: - hg38: - fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/hg38.fa" - gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/genes.gtf" - blacklist: "PIPELINE_HOME/resources/blacklistbed/hg38.bed" - regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" - macs2_g: "hs" - tss_bed: "PIPELINE_HOME/resources/tss_bed/hg38.tss.bed" - rose: "WORKDIR/annotation/hg38_refseq.ucsc" - hg19: - fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/hg19.fa" - gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf" - blacklist: "PIPELINE_HOME/resources/blacklistbed/hg19.bed" - regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" - macs2_g: "hs" - tss_bed: "PIPELINE_HOME/resources/tss_bed/hg19.tss.bed" - rose: "WORKDIR/annotation/hg19_refseq.ucsc" - mm10: - fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10.fa" - gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/genes.gtf" - blacklist: "PIPELINE_HOME/resources/blacklistbed/mm10.bed" - regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY" - macs2_g: "mm" - hs1: - fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/hs1.fa" - gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf" - blacklist: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/T2T.excluderanges.bed" - tss_bed: "PIPELINE_HOME/resources/tss_bed/hs1.tss.bed" - regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" - macs2_g: "3.1e+8" - rose: "WORKDIR/annotation/hs1_refseq.ucsc" -# ref: https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html -# used faCount from http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/ to get 3.1e+8 value above - -spikein_reference: - ecoli: - fa: "PIPELINE_HOME/resources/spikein/Ecoli_GCF_000005845.2_ASM584v2_genomic.fna" - drosophila: - fa: "/fdb/igenomes/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - saccharomyces: - fa: "PIPELINE_HOME/resources/spikein/S_cer_S288C_R64.fna" - -adapters: "PIPELINE_HOME/resources/other/adapters.fa" +##################################################################################### +# Folders / Paths +##################################################################################### +# The working dir... output will be in the results subfolder of the workdir +workdir: "WORKDIR" + +# tab delimited samples file .. see samplefile for format details +samplemanifest: "WORKDIR/config/samples.tsv" + +##################################################################################### +# User parameters +##################################################################################### +# run sample contrasts +run_contrasts: "Y" # Y or N +contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be "Y" +contrasts_fdr_cutoff: "0.05" +contrasts_lfc_cutoff: "0.59" # FC of 1.5 + +# reference +genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. + +# alignment quality threshold +mapping_quality: 2 #only report alignment records with mapping quality of at least N (>= N). + +# normalization method +## spikein: normalization will be performed based off of spike-in aligned read count; +## library: library normalization will be performed +## none: no norm will be performed +norm_method: "spikein" # method of normalization to be used; currently supports ["spikein","library","none"] +## if norm_method ="spikein" +spikein_genome: "ecoli" # must be species found in spikein_reference below +spikein_scale: 1000000 + +# user parameters for alignment +bowtie2_parameters: "--dovetail --phred33 --very-sensitive" +fragment_len_filter: "1000" + +# duplication status +## users can select duplicated peaks (dedup) or non-deduplicated peaks (no_dedup) +### dupstatus: "dedup" # means run deduplicated analysis only +### dupstatus: "no_dedup" # means run non-deduplicated analysis only +## complete list: +### dupstatus: "dedup, no_dedup" +dupstatus: "dedup" + +# which peaktypes to consider for differential analysis: +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | +# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +## macs2 options: macs2_narrow, macs2_broad +### NOTE: DESeq step generally fails for broadPeak; generally has too many calls. + +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent| Non-Normalized, Relaxed | +# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN | +## seacr options: seacr_stringent, seacr_relaxed + +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | +# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +## gopeaks options: gopeaks_narrow, gopeaks_broad + +# | Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed | +# | Macs2 | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +# | SEACR | NA | NA | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN | AVAILABLE w/ SPIKEIN | +# | GoPeaks | AVAILABLE | AVAILABLE | NA | NA | NA | NA | +## complete list: +### peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad" +peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad" + +## macs2 additional option +### macs2 can be run with or without the control. adding a control will increase peak specificity +### default is "N"; selecting "Y" will run the paired control sample provided in the sample manifest +macs2_control: "N" + +# qvalues +## thresholds to be used for peak callers +## must be a list of comma separated values. minimum of numeric value required. +### default MACS2 qvalue is 0.05 https://manpages.ubuntu.com/manpages/xenial/man1/macs2_callpeak.1.html +### default GOPEAKS pvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md +### default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md +quality_thresholds: "0.1, 0.05" + +## MACS2, broad-peaks specific, quality threshold +### if broadPeak is seleted as a 'peaktype', an additional quality threshold can be used +macs2_broad_peak_threshold: "0.01" + +# annotations +## rose parameters +stitch_distance: 12500 +tss_distance: 2500 + +## homer +motif_size: 1000 +preparsedDir: "/data/CCBR_Pipeliner/db/PipeDB/homer/preparsedDir" + +## GO Enrichment +## enrichment analysis can be performed on hg19 or hg38 samples +## one option may be chosen for each project +geneset_id: "GOBP" # ["GOBP" "GOCC" "GOMF" "KEGG"] + +##################################################################################### +# References +# NOTE: "gtf" is only required if TxDb is not avaiable for the species in +# Bioconductor eg. hs1 +##################################################################################### +# references: +reference: + hg38: + fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/hg38.fa" + gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/genes.gtf" + blacklist: "PIPELINE_HOME/resources/blacklistbed/hg38.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" + macs2_g: "hs" + tss_bed: "PIPELINE_HOME/resources/tss_bed/hg38.tss.bed" + rose: "WORKDIR/annotation/hg38_refseq.ucsc" + hg19: + fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/hg19.fa" + gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf" + blacklist: "PIPELINE_HOME/resources/blacklistbed/hg19.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" + macs2_g: "hs" + tss_bed: "PIPELINE_HOME/resources/tss_bed/hg19.tss.bed" + rose: "WORKDIR/annotation/hg19_refseq.ucsc" + mm10: + fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10.fa" + gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/genes.gtf" + blacklist: "PIPELINE_HOME/resources/blacklistbed/mm10.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY" + macs2_g: "mm" + hs1: + fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/hs1.fa" + gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf" + blacklist: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/T2T.excluderanges.bed" + tss_bed: "PIPELINE_HOME/resources/tss_bed/hs1.tss.bed" + regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" + macs2_g: "3.1e+8" + rose: "WORKDIR/annotation/hs1_refseq.ucsc" +# ref: https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html +# used faCount from http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/ to get 3.1e+8 value above + +spikein_reference: + ecoli: + fa: "PIPELINE_HOME/resources/spikein/Ecoli_GCF_000005845.2_ASM584v2_genomic.fna" + drosophila: + fa: "/fdb/igenomes/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + saccharomyces: + fa: "PIPELINE_HOME/resources/spikein/S_cer_S288C_R64.fna" + +adapters: "PIPELINE_HOME/resources/other/adapters.fa" diff --git a/docs/requirements.txt b/docs/requirements.txt index dbb4365..e63c63e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,4 +3,8 @@ mkdocs-git-revision-date-localized-plugin==1.2.0 #https://pypi.org/project/mkdocs-minify-plugin/ mkdocs-minify-plugin==0.6.4 #https://pypi.org/project/mkdocs-git-revision-date-plugin/ -mkdocs-git-revision-date-plugin==0.3.2 \ No newline at end of file +mkdocs-git-revision-date-plugin==0.3.2 +#https://pypi.org/project/mkdocs-material/ +mkdocs-material==9.1.6 +#https://pypi.org/project/mkdocs-material-extensions/ +mkdocs-material-extensions==1.1.1 \ No newline at end of file diff --git a/resources/cluster.yaml b/resources/cluster_biowulf.yaml similarity index 95% rename from resources/cluster.yaml rename to resources/cluster_biowulf.yaml index d145808..eaa4d09 100644 --- a/resources/cluster.yaml +++ b/resources/cluster_biowulf.yaml @@ -1,61 +1,61 @@ -# cluster configuration -################################################################### -__default__: - gres: lscratch:96 - mem: 40g - partition: norm - time: 00-02:00:00 - threads: 2 - name: "{rule}.{wildcards}" - output: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.out" - error: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.err" -################################################################### -align: - mem: 200g - time: 00-12:00:00 - threads: 56 -################################################################### -qc_fastq_screen_validator: - mem: 20g - threads: 4 -################################################################### -create_reference: - mem: 200g - time: 00-12:00:00 - threads: 32 -################################################################### -bam2bg: - mem: 30g - time: 00-1:00:00 - threads: 4 -################################################################### -filter: - mem: 100g - time: 00-12:00:00 -################################################################### -trim: - mem: 200g - time: 00-12:00:00 - threads: 56 -################################################################### -gopeaks_narrow: - mem: 75g - threads: 8 -################################################################### -gopeaks_broad: - mem: 75g - threads: 8 -################################################################### -findMotif: - mem: 10g - threads: 6 -################################################################### -rose: - mem: 32g - threads: 2 - time: 00-06:00:00 -################################################################### -go_enrichment: - time: 01-12:00:00 - mem: 10g +# cluster configuration +################################################################### +__default__: + gres: lscratch:96 + mem: 40g + partition: norm + time: 00-02:00:00 + threads: 2 + name: "{rule}.{wildcards}" + output: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.out" + error: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.err" +################################################################### +align: + mem: 200g + time: 00-12:00:00 + threads: 56 +################################################################### +qc_fastq_screen_validator: + mem: 20g + threads: 4 +################################################################### +create_reference: + mem: 200g + time: 00-12:00:00 + threads: 32 +################################################################### +bam2bg: + mem: 30g + time: 00-1:00:00 + threads: 4 +################################################################### +filter: + mem: 100g + time: 00-12:00:00 +################################################################### +trim: + mem: 200g + time: 00-12:00:00 + threads: 56 +################################################################### +gopeaks_narrow: + mem: 75g + threads: 8 +################################################################### +gopeaks_broad: + mem: 75g + threads: 8 +################################################################### +findMotif: + mem: 10g + threads: 6 +################################################################### +rose: + mem: 32g + threads: 2 + time: 00-06:00:00 +################################################################### +go_enrichment: + time: 02-00:00:00 + mem: 10g ################################################################### \ No newline at end of file diff --git a/resources/cluster_rhel8.yaml b/resources/cluster_rhel8.yaml new file mode 100644 index 0000000..ea66fbf --- /dev/null +++ b/resources/cluster_rhel8.yaml @@ -0,0 +1,61 @@ +# cluster configuration +################################################################### +__default__: + gres: lscratch:96 + mem: 40g + partition: rhel8 + time: 00-02:00:00 + threads: 2 + name: "{rule}.{wildcards}" + output: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.out" + error: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.err" +################################################################### +align: + mem: 200g + time: 00-12:00:00 + threads: 56 +################################################################### +qc_fastq_screen_validator: + mem: 20g + threads: 4 +################################################################### +create_reference: + mem: 200g + time: 00-12:00:00 + threads: 32 +################################################################### +bam2bg: + mem: 30g + time: 00-1:00:00 + threads: 4 +################################################################### +filter: + mem: 100g + time: 00-12:00:00 +################################################################### +trim: + mem: 200g + time: 00-12:00:00 + threads: 56 +################################################################### +gopeaks_narrow: + mem: 75g + threads: 8 +################################################################### +gopeaks_broad: + mem: 75g + threads: 8 +################################################################### +findMotif: + mem: 10g + threads: 6 +################################################################### +rose: + mem: 32g + threads: 2 + time: 00-06:00:00 +################################################################### +go_enrichment: + time: 02-00:00:00 + mem: 10g +################################################################### \ No newline at end of file diff --git a/resources/tools.yaml b/resources/tools_biowulf.yaml similarity index 95% rename from resources/tools.yaml rename to resources/tools_biowulf.yaml index 48c3975..adfb54d 100644 --- a/resources/tools.yaml +++ b/resources/tools_biowulf.yaml @@ -10,7 +10,7 @@ macs2: "macs/2.2.7.1" multiqc: "multiqc/1.14" perl: "perl/5.34" picard: "picard/2.26.9" -python37: "python/3.7" +python3: "python/3.7" R: "R/4.2.2" rose: "ROSE/1.3.1" samtools: "samtools/1.15" diff --git a/resources/tools_rhel8.yaml b/resources/tools_rhel8.yaml new file mode 100644 index 0000000..2f7e023 --- /dev/null +++ b/resources/tools_rhel8.yaml @@ -0,0 +1,20 @@ +bedtools: "bedtools/2.30.0" +bedops: "bedops/2.4.41" +bowtie2: "bowtie/2-2.4.5" +cutadapt: "cutadapt/4.0" +fastqc: "fastqc/0.11.9" +fastq_screen: "fastq_screen/0.15.2" +fastxtoolkit: "fastxtoolkit/0.0.14" +homer: "homer/4.11.1" +macs2: "macs/2.2.7.1" +multiqc: "multiqc/1.14" +perl: "perl/5.34" +picard: "picard/2.27.3" +python3: "python/3.9" +R: "R/4.2.2" +rose: "ROSE/1.3.1" +samtools: "samtools/1.15" +seacr: "SEACR/1.4-beta.2" +ucsc: "ucsc/445" +gopeaks: "/data/CCBR_Pipeliner/Pipelines/gopeaks/gopeaks" +fastq_val: "/data/CCBR_Pipeliner/iCLIP/bin/fastQValidator" \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 7090282..02370e9 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -164,34 +164,30 @@ rule all: ########################################## ### required files ########################################## - ## Rules/Init # manifests unpack(run_pipe_prep), - # Rules/Align - # norm table, if needed + # ALIGN / create_library_norm_scales unpack(run_library_norm), - # alignment stats yaml files and stats table + # ALIGN / alignstats expand(join(RESULTSDIR,"alignment_stats","{replicate}.alignment_stats.yaml"),replicate=REPLICATES), + + # ALIGN / gather_alignstats join(RESULTSDIR,"alignment_stats","alignment_stats.tsv"), - ## Rules/peakcalls - # PEAKCALLS rules + # PEAKCALLS / macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad unpack(run_macs2), unpack(run_seacr), unpack(run_gopeaks), - ## Rules/QC - # qc + # QC unpack(run_qc), - ## Rules/diff - # DIFFERENTIAL + # DIFF / create_contrast_data_files unpack(run_contrasts), - ## Rules/annotation - # ANNOTATION + # ANNOTATION / findMotif, rose, create_contrast_peakcaller_files, go_enrichment unpack(get_motifs), unpack(get_rose), unpack(get_enrichment) @@ -199,18 +195,38 @@ rule all: ########################################## ### intermediate files ########################################## - # bowtie2 index building - # join(BOWTIE2_INDEX,"ref.1.bt2"), - # join(BOWTIE2_INDEX,"ref.len"), - # join(BOWTIE2_INDEX,"spikein.len"), - - ## ALIGN RULES - # filtered bams - # expand(join(RESULTSDIR,"bam","{replicate}.{dupstatus}.bam"),replicate=REPLICATES,dupstatus=DUPSTATUS), - # expand(join(RESULTSDIR,"bam","{replicate}.{dupstatus}.bam.idxstats"),replicate=REPLICATES,dupstatus=DUPSTATUS), - - # # bedgraphs and bigwigs - # expand(join(RESULTSDIR,"fragments","{replicate}.{dupstatus}.fragments.bed"),replicate=REPLICATES,dupstatus=DUPSTATUS), - # expand(join(RESULTSDIR,"bedgraph","{replicate}.{dupstatus}.bedgraph"),replicate=REPLICATES,dupstatus=DUPSTATUS), - # expand(join(RESULTSDIR,"bigwig","{replicate}.{dupstatus}.bigwig"),replicate=REPLICATES,dupstatus=DUPSTATUS), + # INIT / create_reference + join(BOWTIE2_INDEX,"ref.1.bt2"), + join(BOWTIE2_INDEX,"ref.len"), + join(BOWTIE2_INDEX,"spikein.len"), + + # ALIGN / trim + expand(join(RESULTSDIR,"trim","{replicate}.R1.trim.fastq.gz"),replicate=REPLICATES), + expand(join(RESULTSDIR,"trim","{replicate}.R2.trim.fastq.gz"),replicate=REPLICATES), + + # ALIGN / align + expand(join(RESULTSDIR,"bam","raw","{replicate}.bam"),replicate=REPLICATES), + expand(join(RESULTSDIR,"bam","raw","{replicate}.bam.bai"),replicate=REPLICATES), + expand(join(RESULTSDIR,"bam","raw","{replicate}.bam.flagstat"),replicate=REPLICATES), + expand(join(RESULTSDIR,"bam","raw","{replicate}.bam.idxstats"),replicate=REPLICATES), + + # ALIGN / filter + expand(join(RESULTSDIR,"bam","{replicate}.{dupstatus}.bam"),replicate=REPLICATES,dupstatus=DUPSTATUS), + expand(join(RESULTSDIR,"bam","{replicate}.{dupstatus}.bam.idxstats"),replicate=REPLICATES,dupstatus=DUPSTATUS), + expand(join(RESULTSDIR,"bam","{replicate}.{dupstatus}.bam.flagstat"),replicate=REPLICATES,dupstatus=DUPSTATUS), + expand(join(RESULTSDIR,"bam","{replicate}.{dupstatus}.bam.idxstats"),replicate=REPLICATES,dupstatus=DUPSTATUS), + + # ALIGN / alignstats + join(RESULTSDIR,"alignment_stats","{replicate}.alignment_stats.yaml"), + + # ALIGN / gather_alignstats + join(RESULTSDIR,"alignment_stats","alignment_stats.tsv"), + + # ALIGN / create_library_norm_scales + join(RESULTSDIR,"alignment_stats","library_scale.tsv"), + + # ALIGN / bam2bg + expand(join(RESULTSDIR,"fragments","{replicate}.{dupstatus}.fragments.bed"),replicate=REPLICATES,dupstatus=DUPSTATUS), + expand(join(RESULTSDIR,"bedgraph","{replicate}.{dupstatus}.bedgraph"),replicate=REPLICATES,dupstatus=DUPSTATUS), + expand(join(RESULTSDIR,"bigwig","{replicate}.{dupstatus}.bigwig"),replicate=REPLICATES,dupstatus=DUPSTATUS), """ \ No newline at end of file diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index e167479..6017f27 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -135,7 +135,7 @@ rule filter: envmodules: TOOLS["bowtie2"], TOOLS["samtools"], - TOOLS["python37"], + TOOLS["python3"], TOOLS["picard"], TOOLS["ucsc"] shell: @@ -221,7 +221,7 @@ rule alignstats: pyscript = join(SCRIPTSDIR,"_get_nreads_stats.py"), threads: getthreads("alignstats") envmodules: - TOOLS["python37"], + TOOLS["python3"], shell: """ set -exo pipefail diff --git a/workflow/rules/annotations.smk b/workflow/rules/annotations.smk index e59c47d..dcc4be2 100644 --- a/workflow/rules/annotations.smk +++ b/workflow/rules/annotations.smk @@ -99,8 +99,9 @@ rule rose: envmodules: TOOLS["bedtools"], TOOLS["rose"], - TOOLS["python37"], + TOOLS["python3"], TOOLS["samtools"], + TOOLS["R"] threads: getthreads("rose") params: genome = config["genome"], @@ -299,8 +300,6 @@ if config["run_contrasts"] == "Y": """ input: contrast_file=rules.create_contrast_peakcaller_files.output.peak_contrast_files - output: - html=join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"), params: rscript_wrapper=join(SCRIPTSDIR,"_go_enrichment_wrapper.R"), rmd=join(SCRIPTSDIR,"_go_enrichment.Rmd"), @@ -310,7 +309,9 @@ if config["run_contrasts"] == "Y": geneset_id = GENESET_ID, dedup_status = "{dupstatus}" envmodules: - TOOLS["R"] + TOOLS["R"], + output: + html=join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"), shell: """ set -exo pipefail diff --git a/workflow/rules/diff.smk b/workflow/rules/diff.smk index f259d51..e9b6139 100644 --- a/workflow/rules/diff.smk +++ b/workflow/rules/diff.smk @@ -124,7 +124,7 @@ rule make_counts_matrix: params: pyscript=join(SCRIPTSDIR,"_make_counts_matrix.py"), envmodules: - TOOLS["python37"], + TOOLS["python3"], TOOLS["bedtools"], TOOLS["bedops"] shell: @@ -275,7 +275,7 @@ rule diffbb: lfc=LFCCUTOFF, envmodules: TOOLS["ucsc"], - TOOLS["python37"] + TOOLS["python3"] shell: """ set -exo pipefail