forked from ashutoshkpandey/SimplePrograms
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExtract_private.py
58 lines (34 loc) · 1017 Bytes
/
Extract_private.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
## This program extracts the private variants for a given strain from the Sanger 18 strains vcf file (MGP)
import re,sys,fileinput
Argument = []
Argument = sys.argv[1:]
Filepath = Argument[0]
Strain_column = int(Argument[1])
Outpath = Argument[2]
newfile = open(str(Outpath),"w")
for line in fileinput.input([Filepath]):
if line.startswith("#"):
newfile.write(str(line))
continue
rowlist = []
rowlist = line.split("\t")
genotype = []
genotype = rowlist[Strain_column].split(":")
Variant = "No"
if genotype[-1] == "1":
if genotype[0] == "1/1":
Variant = "Yes"
rowlist[Strain_column] = "NA"
Other_strains = []
for geno in rowlist[9:]:
if geno == "NA":
continue
genotype = []
genotype = geno.split(":")[0]
if genotype == "0/0":
Other_strains.append(genotype)
if len(Other_strains) == 17 and Variant == "Yes":
newline = ""
newline = line
newfile.write(str(newline))
newfile.close()