Skip to content

Commit

Permalink
clean code #39
Browse files Browse the repository at this point in the history
  • Loading branch information
sreichl committed Apr 25, 2024
1 parent 5964a86 commit bdea270
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 80 deletions.
6 changes: 0 additions & 6 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,6 @@ import shutil
from snakemake.utils import validate, min_version
from string import Template

# import argparse
# import json
# import csv
# import sys
# import subprocess

module_name = "atacseq_pipeline"

##### set minimum snakemake version #####
Expand Down
4 changes: 2 additions & 2 deletions workflow/envs/multiqc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ dependencies:
- pytables=3.9.1
- pip=23.3.1
- pip:
# - 'git+https://github.com/epigen/atacseq_pipeline/#egg=atacseq_report&subdirectory=workflow/scripts/multiqc_atacseq' # works
- -e /home/sreichl/projects/atacseq_pipeline/workflow/scripts/multiqc_atacseq # works but only local
- 'git+https://github.com/epigen/atacseq_pipeline/#egg=atacseq_report&subdirectory=workflow/scripts/multiqc_atacseq' # works
# - -e /home/sreichl/projects/atacseq_pipeline/workflow/scripts/multiqc_atacseq # works but only local
2 changes: 0 additions & 2 deletions workflow/rules/processing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ rule tss_coverage:
bam = os.path.join(result_path,"results","{sample}","mapped","{sample}.filtered.bam"),
bai = os.path.join(result_path,"results","{sample}","mapped","{sample}.filtered.bam.bai"),
output:
# bigWig = os.path.join(result_path, "hub","{sample}.bigWig"),
# bigWig_log = os.path.join(result_path, "hub","{sample}.bigWig.log"),
tss_hist = os.path.join(result_path,"results","{sample}","{sample}.tss_histogram.csv"),
params:
# parameters for coverage
Expand Down
1 change: 1 addition & 0 deletions workflow/rules/quantification.smk
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ rule map_consensus_tss:
consensus_counts = os.path.join(result_path,"counts","consensus_counts.csv"),
output:
tss_counts = os.path.join(result_path,"counts","TSS_counts.csv"),
tss_annot = os.path.join(result_path,"counts","TSS_annotation.csv"),
params:
# cluster parameters
partition=config.get("partition"),
Expand Down
68 changes: 0 additions & 68 deletions workflow/rules/report.smk
Original file line number Diff line number Diff line change
Expand Up @@ -37,79 +37,11 @@ rule symlink_stats:
ln -sfn $(realpath --relative-to=$(dirname {output.macs2_log}) {input.macs2_log}) {output.macs2_log}
ln -sfn $(realpath --relative-to=$(dirname {output.peaks_xls}) {input.peaks_xls}) {output.peaks_xls}
"""

# rule ucsc_hub:
# input:
# bigwig_files = expand(os.path.join(result_path, "hub", "{sample}.bigWig"), sample=samples.keys()),
# output:
# bigwig_symlinks = expand(os.path.join(result_path, "hub", config["genome"], "{sample}.bigWig"), sample=samples.keys()),
# genomes_file = os.path.join(result_path, "hub", "genomes.txt"),
# hub_file = os.path.join(result_path, "hub", "hub.txt"),
# trackdb_file = os.path.join(result_path, "hub", config["genome"], "trackDb.txt"),
# params:
# # cluster parameters
# partition=config.get("partition"),
# resources:
# mem_mb=config.get("mem", "1000"),
# threads: config.get("threads", 1)
# log:
# "logs/rules/ucsc_hub.log"
# run:
# # create bigwig symlinks
# for i in range(len(input.bigwig_files)):
# os.symlink(os.path.join('../',os.path.basename(input.bigwig_files[i])), output.bigwig_symlinks[i])

# # create genomes.txt
# with open(output.genomes_file, 'w') as gf:
# genomes_text = f'genome {config["genome"]}\ntrackDb {config["genome"]}/trackDb.txt\n'
# gf.write(genomes_text)

# # create hub file
# with open(output.hub_file, 'w') as hf:
# hub_text = [f'hub {config["project_name"]}',
# f'shortLabel {config["project_name"]}',
# f'longLabel {config["project_name"]}',
# 'genomesFile genomes.txt',
# f'email {config["email"]}\n',]
# hf.write('\n'.join(hub_text))

# # create trackdb file
# with open(output.trackdb_file, 'w') as tf:
# colors = ['166,206,227', '31,120,180', '51,160,44', '251,154,153', '227,26,28',
# '253,191,111', '255,127,0', '202,178,214', '106,61,154', '177,89,40']

# track_db = ['track {}'.format(config["project_name"]),
# 'type bigWig', 'compositeTrack on', 'autoScale on', 'maxHeightPixels 32:32:8',
# 'shortLabel {}'.format(config["project_name"][:8]),
# 'longLabel {}'.format(config["project_name"]),
# 'visibility full',
# '', '']
# for sample_name in samples.keys():
# track_color = '255,40,0'

# if config["annot_columns"][0]!="":
# color_hash = hash(samples[sample_name][config["annot_columns"][0]])
# track_color = colors[color_hash % len(colors)]

# track = ['track {}'.format(sample_name),
# 'shortLabel {}'.format(sample_name),
# 'longLabel {}'.format(sample_name),
# 'bigDataUrl {}.bigWig'.format(sample_name),
# 'parent {} on'.format(config["project_name"]),
# 'type bigWig', 'windowingFunction mean',
# 'color {}'.format(track_color),
# '', '']

# track_db += track

# tf.write('\n'.join(track_db))

rule multiqc:
input:
expand(os.path.join(result_path,"results","{sample}","mapped", "{sample}.filtered.bam"), sample=samples.keys()),
expand(os.path.join(result_path,"results","{sample}","peaks","{sample}_peaks.narrowPeak"), sample=samples.keys()),
# expand(os.path.join(result_path, "hub","{sample}.bigWig"),sample=samples.keys()),
# trackdb_file = os.path.join(result_path, "hub", config["genome"], "trackDb.txt"), # representing UCSC hub
expand(os.path.join(result_path, 'report', '{sample}_peaks.xls'), sample=samples.keys()), # representing symlinked stats
sample_annotation = config["annotation"],
output:
Expand Down
7 changes: 7 additions & 0 deletions workflow/scripts/map_consensus_tss.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def map_region(x):

# output
tss_counts_path = snakemake.output["tss_counts"]
tss_annot_path = snakemake.output["tss_annot"]

# parameters
TSS_up = -snakemake.config["proximal_size_up"]
Expand All @@ -36,3 +37,9 @@ def map_region(x):
TSS_counts = consensus_counts.loc[TSS_regions["peak_id"],:]
TSS_counts.index = TSS_regions.index
TSS_counts.to_csv(tss_counts_path)

# subset the consensus annotation by the successfully mapped consenesus regions, rename index to genes and save
TSS_annot = annot_regions.loc[TSS_regions["peak_id"],:]
TSS_annot.reset_index(inplace=True)
TSS_annot.index = TSS_regions.index
TSS_annot.to_csv(tss_annot_path)
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def __init__(self):
return None

# Initialise the parent object
super(MultiqcModule, self).__init__(name='ATAC-seq Pipeline', anchor='atacseq',
super(MultiqcModule, self).__init__(name='The ATAC-seq Pipeline', anchor='atacseq',
href='https://github.com/epigen/atacseq_pipeline',
info="The ATAC-seq pipeline processes and quantifies ATAC-seq data.")
info="processes, quantifies and annotates ATAC-seq data.")
log.info('Initialized atacseq module')

# Parse ATAC-seq stats for each sample
Expand Down

0 comments on commit bdea270

Please sign in to comment.