-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathbedToWig.py
executable file
·94 lines (87 loc) · 2.62 KB
/
bedToWig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#from __future__ import division, with_statement
'''
Copyright 2013, 陈同 ([email protected]).
Please see the license file for legal information.
===========================================================
'''
__author__ = 'chentong & ct586[9]'
__author_email__ = '[email protected]'
#=========================================================
'''
This transfers BED file to variable step WIG file.
Attention:
1.No strand information is considered
2.A known bug for Pair-End data. If paired reads overlapped,
overlapped regions will be counted twice.
Notice:
bdg: zero-based, half-open.
wig: 1-based, no strand specific usually
bed: zero-based, half-open.
'''
import sys
def output(chr, adict, col):
'''
adict={chr:{pos:value}}
'''
print 'variableStep chrom=%s' % chr
posL = adict[chr].keys()
posL.sort(key=lambda x:int(x))
if col == -1:
for i in posL:
print "%s\t%d" % (i, adict[chr][i])
else:
for i in posL:
print "%s\t%f" % (i, adict[chr][i])
#----------------------------------------
#-----------------------------
def main():
if len(sys.argv) < 2:
print >>sys.stderr, "Print the result to screen"
print >>sys.stderr, 'Using python %s bedfile[sort by \
chromosome] innerValue[Default FALSE,which means value is \
1. Accept a number to represent the column[1-based] of \
the innerValue.]' % sys.argv[0]
sys.exit(0)
#-------------------------------
"""
bdg: zero-based, half-open.
wig: 1-based, no strand specific
bed: zero-based, half-open.
"""
col = -1
if len(sys.argv) == 3:
col = int(sys.argv[2]) - 1
#-------------------------------------
chr = ''
adict = {}
for line in open(sys.argv[1]):
lineL = line.split()
if col == -1:
value = 1
else:
value = float(lineL[col])
#----------------------------------
if chr and chr != lineL[0]:
output(chr, adict, col)
adict = {}
chr = ''
if not chr:
chr = lineL[0]
assert( chr not in adict)
adict[chr] = {}
#----------------------------
start = int(lineL[1]) + 1
end = int(lineL[2]) + 1
for i in range(start, end):
if i not in adict[chr]:
adict[chr][i] = value
else:
adict[chr][i] += value
#---------------------------
#---------------------------------
if chr:
output(chr, adict, col)
if __name__ == '__main__':
main()