clean code #39

epigen · Apr 25, 2024 · bdea270 · bdea270
1 parent 5964a86
commit bdea270
Show file tree

Hide file tree

Showing 7 changed files with 12 additions and 80 deletions.
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -6,12 +6,6 @@ import shutil
 from snakemake.utils import validate, min_version
 from string import Template
 
-# import argparse
-# import json
-# import csv
-# import sys
-# import subprocess
-
 module_name = "atacseq_pipeline"
 
 ##### set minimum snakemake version #####

diff --git a/workflow/envs/multiqc.yaml b/workflow/envs/multiqc.yaml
@@ -10,5 +10,5 @@ dependencies:
   - pytables=3.9.1
   - pip=23.3.1
   - pip:
-#       - 'git+https://github.com/epigen/atacseq_pipeline/#egg=atacseq_report&subdirectory=workflow/scripts/multiqc_atacseq' # works
-    - -e /home/sreichl/projects/atacseq_pipeline/workflow/scripts/multiqc_atacseq # works but only local
+      - 'git+https://github.com/epigen/atacseq_pipeline/#egg=atacseq_report&subdirectory=workflow/scripts/multiqc_atacseq' # works
+#     - -e /home/sreichl/projects/atacseq_pipeline/workflow/scripts/multiqc_atacseq # works but only local
diff --git a/workflow/rules/processing.smk b/workflow/rules/processing.smk
@@ -64,8 +64,6 @@ rule tss_coverage:
         bam = os.path.join(result_path,"results","{sample}","mapped","{sample}.filtered.bam"),
         bai = os.path.join(result_path,"results","{sample}","mapped","{sample}.filtered.bam.bai"),
     output:
-#         bigWig = os.path.join(result_path, "hub","{sample}.bigWig"),
-#         bigWig_log = os.path.join(result_path, "hub","{sample}.bigWig.log"),
         tss_hist = os.path.join(result_path,"results","{sample}","{sample}.tss_histogram.csv"),
     params:
         # parameters for coverage

diff --git a/workflow/rules/quantification.smk b/workflow/rules/quantification.smk
@@ -163,6 +163,7 @@ rule map_consensus_tss:
         consensus_counts = os.path.join(result_path,"counts","consensus_counts.csv"),
     output:
         tss_counts = os.path.join(result_path,"counts","TSS_counts.csv"),
+        tss_annot = os.path.join(result_path,"counts","TSS_annotation.csv"),
     params:
         # cluster parameters
         partition=config.get("partition"),

diff --git a/workflow/rules/report.smk b/workflow/rules/report.smk
@@ -37,79 +37,11 @@ rule symlink_stats:
         ln -sfn $(realpath --relative-to=$(dirname {output.macs2_log}) {input.macs2_log}) {output.macs2_log}
         ln -sfn $(realpath --relative-to=$(dirname {output.peaks_xls}) {input.peaks_xls}) {output.peaks_xls}
         """
-
-# rule ucsc_hub:
-#     input:
-#         bigwig_files = expand(os.path.join(result_path, "hub", "{sample}.bigWig"), sample=samples.keys()),
-#     output:
-#         bigwig_symlinks = expand(os.path.join(result_path, "hub", config["genome"], "{sample}.bigWig"), sample=samples.keys()),
-#         genomes_file = os.path.join(result_path, "hub", "genomes.txt"),
-#         hub_file = os.path.join(result_path, "hub", "hub.txt"),
-#         trackdb_file = os.path.join(result_path, "hub", config["genome"], "trackDb.txt"),
-#     params:
-#         # cluster parameters
-#         partition=config.get("partition"),
-#     resources:
-#         mem_mb=config.get("mem", "1000"),
-#     threads: config.get("threads", 1)
-#     log:
-#         "logs/rules/ucsc_hub.log"
-#     run:
-#         # create bigwig symlinks
-#         for i in range(len(input.bigwig_files)):
-#             os.symlink(os.path.join('../',os.path.basename(input.bigwig_files[i])), output.bigwig_symlinks[i])
-
-#         # create genomes.txt
-#         with open(output.genomes_file, 'w') as gf:
-#             genomes_text = f'genome {config["genome"]}\ntrackDb {config["genome"]}/trackDb.txt\n'
-#             gf.write(genomes_text)
-
-#         # create hub file
-#         with open(output.hub_file, 'w') as hf:
-#             hub_text = [f'hub {config["project_name"]}',
-#                         f'shortLabel {config["project_name"]}',
-#                         f'longLabel {config["project_name"]}',
-#                         'genomesFile genomes.txt',
-#                         f'email {config["email"]}\n',]
-#             hf.write('\n'.join(hub_text))
-
-#         # create trackdb file
-#         with open(output.trackdb_file, 'w') as tf:
-#             colors = ['166,206,227', '31,120,180', '51,160,44', '251,154,153', '227,26,28',
-#                               '253,191,111', '255,127,0', '202,178,214', '106,61,154', '177,89,40']
-
-#             track_db = ['track {}'.format(config["project_name"]),
-#                         'type bigWig', 'compositeTrack on', 'autoScale on', 'maxHeightPixels 32:32:8',
-#                         'shortLabel {}'.format(config["project_name"][:8]),
-#                         'longLabel {}'.format(config["project_name"]),
-#                         'visibility full',
-#                         '', '']
-#             for sample_name in samples.keys():
-#                 track_color = '255,40,0'
-
-#                 if config["annot_columns"][0]!="":
-#                     color_hash = hash(samples[sample_name][config["annot_columns"][0]])
-#                     track_color = colors[color_hash % len(colors)]
-
-#                 track = ['track {}'.format(sample_name),
-#                          'shortLabel {}'.format(sample_name),
-#                          'longLabel {}'.format(sample_name),
-#                          'bigDataUrl {}.bigWig'.format(sample_name),
-#                          'parent {} on'.format(config["project_name"]),
-#                          'type bigWig', 'windowingFunction mean',
-#                          'color {}'.format(track_color),
-#                          '', '']
-
-#                 track_db += track
-
-#             tf.write('\n'.join(track_db))
 
 rule multiqc:
     input:
         expand(os.path.join(result_path,"results","{sample}","mapped", "{sample}.filtered.bam"), sample=samples.keys()),
         expand(os.path.join(result_path,"results","{sample}","peaks","{sample}_peaks.narrowPeak"), sample=samples.keys()),
-#         expand(os.path.join(result_path, "hub","{sample}.bigWig"),sample=samples.keys()),
-#         trackdb_file = os.path.join(result_path, "hub", config["genome"], "trackDb.txt"), # representing UCSC hub
         expand(os.path.join(result_path, 'report', '{sample}_peaks.xls'), sample=samples.keys()), # representing symlinked stats
         sample_annotation = config["annotation"],
     output:

diff --git a/workflow/scripts/map_consensus_tss.py b/workflow/scripts/map_consensus_tss.py
@@ -19,6 +19,7 @@ def map_region(x):
 
 # output
 tss_counts_path = snakemake.output["tss_counts"]
+tss_annot_path = snakemake.output["tss_annot"]
 
 # parameters
 TSS_up = -snakemake.config["proximal_size_up"]
@@ -36,3 +37,9 @@ def map_region(x):
 TSS_counts = consensus_counts.loc[TSS_regions["peak_id"],:]
 TSS_counts.index = TSS_regions.index
 TSS_counts.to_csv(tss_counts_path)
+
+# subset the consensus annotation by the successfully mapped consenesus regions, rename index to genes and save
+TSS_annot = annot_regions.loc[TSS_regions["peak_id"],:]
+TSS_annot.reset_index(inplace=True)
+TSS_annot.index = TSS_regions.index
+TSS_annot.to_csv(tss_annot_path)
diff --git a/workflow/scripts/multiqc_atacseq/atacseq_report/modules/atacseq/atacseq.py b/workflow/scripts/multiqc_atacseq/atacseq_report/modules/atacseq/atacseq.py
@@ -31,9 +31,9 @@ def __init__(self):
             return None
 
         # Initialise the parent object
-        super(MultiqcModule, self).__init__(name='ATAC-seq Pipeline', anchor='atacseq',
+        super(MultiqcModule, self).__init__(name='The ATAC-seq Pipeline', anchor='atacseq',
                                             href='https://github.com/epigen/atacseq_pipeline',
-                                            info="The ATAC-seq pipeline processes and quantifies ATAC-seq data.")
+                                            info="processes, quantifies and annotates ATAC-seq data.")
         log.info('Initialized atacseq module')
 
         # Parse ATAC-seq stats for each sample