-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathreverse_complement.py
56 lines (48 loc) · 1.86 KB
/
reverse_complement.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# See tests for a more comprehensive complementary table
SIMPLE_COMPLEMENTS_STR = """#Reduced table with bases A, G, C, T
Base Complementary Base
A T
T A
G C
C G
"""
# Recommended helper function
def _clean_sequence(sequence, str_table):
"""
Receives a DNA sequence and a str_table that defines valid (and
complementary) bases
Returns all sequences converted to upper case and remove invalid
characters
t!t%ttttAACCG --> TTTTTTAACCG
"""
filter_list = [s[1] for s in str_table.splitlines()[2:]]
return "".join(filter(lambda c: c in filter_list, sequence.upper()))
def reverse(sequence, str_table=SIMPLE_COMPLEMENTS_STR):
"""
Receives a DNA sequence and a str_table that defines valid (and
complementary) bases
Returns a reversed string of sequence while removing all characters
not found in str_table characters
e.g. t!t%ttttAACCG --> GCCAATTTTTT
"""
return _clean_sequence(sequence, str_table)[::-1]
def complement(sequence, str_table=SIMPLE_COMPLEMENTS_STR):
"""
Receives a DNA sequence and a str_table that defines valid (and
complementary) bases
Returns a string containing complementary bases as defined in
str_table while removing non input_sequence characters
e.g. t!t%ttttAACCG --> AAAAAATTGGC
"""
clean_seq = _clean_sequence(sequence, str_table)
replace_dict = {s[1]: s[-1] for s in str_table.splitlines(keepends=False)[2:]}
return "".join(map(lambda c: replace_dict[c], clean_seq))
def reverse_complement(sequence, str_table=SIMPLE_COMPLEMENTS_STR):
"""
Receives a DNA sequence and a str_table that defines valid (and
complementary) bases
Returns a string containing complementary bases as defined in str_table
while removing non input_sequence characters
e.g. t!t%ttttAACCG --> CGGTTAAAAAA
"""
return complement(sequence, str_table)[::-1]