-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathfpkm_tracking.py
executable file
·50 lines (40 loc) · 1.78 KB
/
fpkm_tracking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
from optparse import OptionParser
import pandas as pd
################################################################################
# fpkm_tracking
#
# Print a table of FPKM abundance estimates with one gene/sample per row.
#
# Using Pandas for this is stupid because we have to read the whole thing
# into memory, and when you iterate over rows, it has to create a Series object.
################################################################################
################################################################################
# main
################################################################################
def main():
usage = 'usage: %prog [options] <fpkm_tracking>'
parser = OptionParser(usage)
parser.add_option('-g', dest='gene_id', help='This gene only')
(options,args) = parser.parse_args()
if len(args) != 1:
parser.error('Must provide .fpkm_tracking file')
else:
fpkm_tracking_file = args[0]
cuff = pd.read_csv(fpkm_tracking_file, sep='\t')
fpkm_indexes = [i for i in range(cuff.shape[1]) if cuff.columns[i][-5:] == '_FPKM']
for gene_i, gene_series in cuff.iterrows():
gene_id = gene_series['gene_id']
if options.gene_id == None or gene_id == options.gene_id:
for i in fpkm_indexes:
sample = cuff.columns[i][:-5]
fpkm = str(gene_series[i])
status = gene_series[i+3]
if status == 'OK':
cols = [gene_series['tracking_id'], sample, fpkm]
print '\t'.join(cols)
################################################################################
# __main__
################################################################################
if __name__ == '__main__':
main()