-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmultiple_soft_elimination.py
59 lines (52 loc) · 2.12 KB
/
multiple_soft_elimination.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from string_utils import get_proper_prefixes_of_list
from KMP import *
from constants import infinity
class MultipleSoftElimination:
def __init__(self, sequence, patterns, cost, alphabet):
self._sequence = sequence
self._patterns = patterns
self._cost = cost #cost class that exposes get method
self._alphabet = alphabet
def eliminate(self):
n = len (self._sequence)
valid_prefixes = get_proper_prefixes_of_list(self._patterns)
A = {i:{prefix:infinity for prefix in valid_prefixes} for i in range(0,n+1)}
traceback = {i:{prefix:(None, None) for prefix in valid_prefixes} for i in range(0,n+1)}
#initialization
A[0][""] = 0.0
#update
kmp_update_function = KmpUpdateFunction(patterns=self._patterns, alphabet=self._alphabet)
for i in range(1,n+1):
for p in valid_prefixes:
min_cost = infinity
min_source = []
for source in kmp_update_function.get_source_set(p):
pref = source[0]
sigma = source[1]
val = A[i-1][pref] + self._cost.get(i, sigma)
if val <= min_cost and val != infinity:
if val == min_cost:
min_source.append(source)
else: #new minimum
min_cost = val
min_source = [source]
A[i][p] = min_cost
traceback[i][p] = min_source #list of pairs (pref, sigma)
#construct the modified sequence
i = n
res_seq = list()
min_cost = infinity
min_prefix = None
for p in valid_prefixes:
prefix_cost = A[i][p]
if prefix_cost < min_cost:
min_cost = prefix_cost
min_prefix = p
if min_prefix is None:
return None
while i > 0 :
pointers = traceback[i][min_prefix][0] #TODO: randomize?
res_seq.insert(0, pointers[1])
min_prefix = pointers[0]
i -= 1
return ''.join(res_seq)