-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpipeline_cellranger.py
127 lines (87 loc) · 2.74 KB
/
pipeline_cellranger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""===========================
Pipeline template
===========================
.. Replace the documentation below with your own description of the
pipeline's purpose
Overview
========
This pipeline computes the word frequencies in the configuration
files :file:``pipeline.yml` and :file:`conf.py`.
Usage
=====
See :ref:`PipelineSettingUp` and :ref:`PipelineRunning` on general
information how to use cgat pipelines.
Configuration
-------------
The pipeline requires a configured :file:`pipeline.yml` file.
cgatReport report requires a :file:`conf.py` and optionally a
:file:`cgatreport.yml` file (see :ref:`PipelineReporting`).
Default configuration files can be generated by executing:
python <srcdir>/pipeline_@[email protected] config
Input files
-----------
None required except the pipeline configuration files.
Requirements
------------
The pipeline requires the results from
:doc:`pipeline_annotations`. Set the configuration variable
:py:data:`annotations_database` and :py:data:`annotations_dir`.
Pipeline output
===============
.. Describe output files of the pipeline here
Glossary
========
.. glossary::
Code
====
"""
from ruffus import *
from cgatcore import pipeline as P
import sys
import os
import re
import pandas as pd
PARAMS = P.get_parameters(
["%s/pipeline.yml" % os.path.splitext(__file__)[0],
"../pipeline.yml",
"pipeline.yml"])
# prints date and time, e.g. '2020-07-14T10:03:08'
DATETIME = "date +'%Y-%m-%dT%H:%M:%S'"
SAMPLES = pd.read_csv("samples.csv")
SAMPLES.set_index('name', inplace=True)
@follows(mkdir("count"))
@transform("data/*/.sample",
regex(r"data/([A-Za-z0-9_]*)/.sample"),
r"count/\1.done")
def cellranger_count(infile, outfile):
'''Docstring'''
sample = re.search('data/([A-Za-z0-9_]*)/.sample', infile).group(1)
fastqs = SAMPLES['fastqs'][sample]
cells = SAMPLES['cells'][sample]
chemistry = SAMPLES['chemistry'][sample]
transcriptome = PARAMS["transcriptome"]
datetime = DATETIME
job_threads = PARAMS["cellranger"]["count"]["threads"]
job_memory = PARAMS["cellranger"]["count"]["memory"]
local_memory = int(job_memory.replace("G", "")) * job_threads
statement = """
%(datetime)s > count/%(sample)s.time &&
cellranger count
--id %(sample)s
--transcriptome %(transcriptome)s
--fastqs %(fastqs)s
--expect-cells %(cells)s
--chemistry %(chemistry)s
--localcores %(job_threads)s
--localmem %(local_memory)s &&
mv %(sample)s count/ &&
touch %(outfile)s &&
%(datetime)s >> count/%(sample)s.time
"""
P.run(statement)
def main(argv=None):
if argv is None:
argv = sys.argv
P.main(argv)
if __name__ == "__main__":
sys.exit(P.main(sys.argv))