forked from zehavitc/EliminatingDNAPatterns
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdna_utils.py
38 lines (30 loc) · 1.17 KB
/
dna_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from itertools import product
from constants import dna_iupac_dict
def extend_ambiguous_dna(seq):
"""
:param seq: a DNA sequence, possibly with IUPAC code (upper cased)
:return: return list of all possible sequences given an ambiguous DNA input seq,
based on https://www.bioinformatics.org/sms/iupac.html
gap (. or - ) are not supported
"""
#TODO - check that the sequence is valid (only DNA letters)
return list(map("".join, product(*map(dna_iupac_dict.get, seq))))
def is_purine(letter):
lower_letter = letter.lower()
return lower_letter == "a" or lower_letter == "g"
def is_pyrimidine(letter):
lower_letter = letter.lower()
return lower_letter == "c" or lower_letter == "t"
def is_transition(old, new):
return (is_purine(old) and is_purine(new)) or (is_pyrimidine(old) and is_pyrimidine(new))
def is_transversion(old,new):
return not(is_transition(old,new))
def contains_transversion(list):
found_pyrimidine = False
found_purine = False
for letter in list:
if is_pyrimidine(letter):
found_pyrimidine = True
else:
found_purine = True
return found_purine and found_pyrimidine