-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcsv2plink.py
executable file
·103 lines (79 loc) · 2.14 KB
/
csv2plink.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
import sys
import getopt
import csv
def csv2pedmap(csvin, pedout, mapout, chromosome, phenotype):
"""Convert a CSV with SNPs as rows and subjects as columns to PLINK
PED/MAP files.
Parameters:
csvin - Readable filehandle (CSV source)
pedout - Writable filehandle (PED target)
mapout - Writable filehandle (MAP target)
"""
reader = csv.reader(csvin)
subjects = reader.next()[1:]
columns = [row for row in reader]
rows = zip(*columns)
snps = rows.pop(0)
writeMAP(mapout, chromosome, snps)
writePED(pedout, subjects, phenotype, rows)
def writeMAP(mapout, chromosome, snps):
try:
rowformatstring = str(int(chromosome)) + "\t%s\t1\n"
except:
rowformatstring = chromosome + "\t%s\t1\n"
map(lambda x: mapout.write(rowformatstring % x), snps)
def writePED(pedout, subjects, phenotype, rows):
rowformatstring = "%s\t" + phenotype + "\t%s\n"
for subj,row in zip(subjects,rows):
pedout.write(rowformatstring % (subj, '\t'.join([" ".join(g) for g in row])))
def main(argv):
cmd = argv.pop(0).split('/')[-1]
help = """Usage: %s OPTIONS [INPUT] OUTPUT
Converts from CSV format (SNP,GENOTYPE1,GENOTYPE2,...) to PLINK MAP/PED file.
Example:
%s -c 1 -c 0 input.csv output
produces output.map, output.ped
""" % (cmd,cmd)
try:
opts,args = getopt.getopt(argv, "c:p:h", ["chromosome=","phenotype=","help"])
except getopt.error,msg:
print msg
print help
return 1
chrom = ""
phen = ""
for opt,arg in opts:
if opt in ("-h","--help"):
print help
return 0
elif opt in ("-c","--chromosome"):
chrom = arg
elif opt in ("-p","--phenotype"):
phen = arg
else:
print "Unrecognized option %s %s" % (opt, arg)
print help
return 1
if chrom == "" or phen == "":
print "Chromosome and phenotype options are required"
print help
return 1
argc = len(args)
if not argc:
print help
return 1
out = args.pop()
argc -= 1
if argc:
infile = open(args[0], 'r')
else:
infile = sys.stdin
ped = open("%s.ped" % out, 'w')
map = open("%s.map" % out, 'w')
csv2pedmap(infile, ped, map, chrom, phen)
infile.close()
ped.close()
map.close()
if __name__ == '__main__':
sys.exit(main(sys.argv))