-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtasker.py
176 lines (145 loc) · 8.18 KB
/
tasker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import numpy as np
class Tasker():
def __init__(self):
pass
def task_1(self, p, N):
print('Task 1: Genetic drift')
population = np.random.choice(['A', 'B'], size=N, p=[p, 1 - p])
generations = 2*N #each new generation is obtained from the previous generation 2N times
allele_freqs = []
for gen in range(generations):
allele_freq_A = np.sum(population == 'A') / N #Pt depends on Pt-1
allele_freqs.append(allele_freq_A)
#choosing an allele at random
#Each individual in generation t+1 is a copy of a randomly selected individual in generation t.
population = np.random.choice(population, size=N, replace=True)
#fixation
if allele_freq_A == 1 or allele_freq_A == 0:
break
return allele_freqs
def task_2_identicalByDescent(self, N):
print('Task 2: Coalescent model + identical by descent')
population = np.arange(N)
generations = 0
freq = []
freq.append(1/N) #population has N alleles then first frequency of each allele = 1/N
while len(set(population)) > 1: #unique number in population, stop when population has only one number left.
generations += 1
#Each individual in generation t+1 is a copy of a randomly selected individual in generation t.
population = np.random.choice(population, size=N, replace=True)
#If True, also return the number of times each unique item appears in ar.
unique, counts = np.unique(population, return_counts=True)
freq.append(np.max(counts) / N)
return generations, freq
def task_2_firstCoalescentEvent(self, n, N):
print('Task 2: First coalescent event + sample size')
generations = 0
while 1:
generations += 1
coalescent_proba = n*(n-1)/(2*N)
if np.random.rand() < coalescent_proba:
break
return generations
def task_2_coalescentModel(self, n, N):
print('Task 2: Coalescent model + sample size')
generations = 0
while n > 1:
generations += 1
coalescent_proba = n*(n-1)/(2*N)
if np.random.rand() < coalescent_proba:
n -= 1
return generations
def task_3(self, N, mutation_rate, generations):
print('Task 3: Mutations in the infinite-allele model')
population = np.zeros(N, dtype=int) #Start from an initially homogeneous population of N identical alleles
fixation_index = []
for gen in range(generations):
unique, counts = np.unique(population, return_counts=True)
freq = counts / N
fixation_index.append((1-mutation_rate**2)*((1/N)+(1-(1/N))*np.sum(freq ** 2)))
offspring = np.random.choice(population, size=N)
mutations = np.random.rand(N) < mutation_rate #return True if rand < mutation_rate
#change values in offspring by random integers from 0 to np.sum(mutations) [like from 0 to 10], using index of True variable in mutations array to refill values in offsping array.
offspring[mutations] = np.random.randint(np.sum(mutations), size=np.sum(mutations))
population = offspring
return fixation_index
def task_4(self, N, p, s, generations):
# the selection can be fitness dependent,
# or mutations between parent and offspring may appear with some mutation rate.
print('Task 4: Selection')
population = np.random.choice(['A', 'B'], size=N, p=[p, 1 - p])
freq_B = []
fitness = {'A': 1, 'B': 1 + s}
for gen in range(generations):
#The fitness is realized by selecting a parent allele with a probability proportional to fitness from the parent population
selection_proba = np.array([fitness[allele] for allele in population])
selection_proba = selection_proba/np.sum(selection_proba)
population = np.random.choice(population, size=N, p=selection_proba)
freq_B.append(np.sum(population == 'B') / N)
return freq_B
def task_5(self, N, generations, pA = 0.79, pB = 0.2, pC = 0.01):
print('Task 5: Clonal inference')
population = np.random.choice(['A', 'B', 'C'], size=N, p=[pA, pB, pC])
freq_A = []
freq_B = []
freq_C = []
fitness = {'A': 1, 'B': 1.05, 'C': 1.1}
for gen in range(generations):
selection_proba = np.array([fitness[allele] for allele in population])
selection_proba = selection_proba/np.sum(selection_proba)
population = np.random.choice(population, size=N, p=selection_proba)
freq_A.append(np.sum(population == 'A') / N)
freq_B.append(np.sum(population == 'B') / N)
freq_C.append(np.sum(population == 'C') / N)
return freq_A, freq_B, freq_C
def task_6(self, p, N, generations, subpops=10):
# Divide the population of Task 1 now in 10 equally large sub-populations. Simulate the
# system with a complete separation of the sub-populations: parents are selected only in the
# same subpopulation, and no mutation, selection or migration exists
print('Task 6: Population structure')
population = np.random.choice(['A', 'B'], size=N, p=[p, 1 - p]) #each new generation is obtained from the previous generation 2N times
subpop_size = N // subpops
sub_pops = []
for i in range(subpops):
sub_pops.append(population[i*subpop_size:i*subpop_size + subpop_size])
allele_freqs = [[] for i in range(subpops)]
for gen in range(generations):
for i in range(subpops):
allele_freq_A = np.sum(sub_pops[i] == 'A') / subpop_size #Pt depends on Pt-1
allele_freqs[i].append(allele_freq_A)
#choosing an allele at random
#Each individual in generation t+1 is a copy of a randomly selected individual in generation t.
sub_pops[i] = np.random.choice(sub_pops[i], size=subpop_size, replace=True)
#fixation
# if allele_freq_A == 1 or allele_freq_A == 0:
# break
return allele_freqs
def task_7(self, p, N, generations, migration_rate, subpops=10):
# a migration of a fraction m = 0.1 of each
# population towards and from randomly chosen subpopulations (i.e. the subpopulations
# exchange individuals, but remain of the same size)
print('Task 7: Migration')
population = np.random.choice(['A', 'B'], size=N, p=[p, 1 - p]) #each new generation is obtained from the previous generation 2N times
subpop_size = N // subpops
sub_pops = []
for i in range(subpops):
sub_pops.append(population[i*subpop_size:i*subpop_size + subpop_size])
allele_freqs = [[] for i in range(subpops)]
for gen in range(generations):
for i in range(subpops):
allele_freq_A = np.sum(sub_pops[i] == 'A') / subpop_size #Pt depends on Pt-1
allele_freqs[i].append(allele_freq_A)
#choosing an allele at random
#Each individual in generation t+1 is a copy of a randomly selected individual in generation t.
sub_pops[i] = np.random.choice(sub_pops[i], size=subpop_size, replace=True)
for i in range(subpops):
#Define migrants
num_migrants = int(migration_rate * subpop_size)
migrants = np.random.choice(sub_pops[i], size=num_migrants, replace=False)
#Choose sub-population
target = np.random.choice([j for j in range(subpops) if j != i])
non_migrants = np.random.choice(sub_pops[target], size=num_migrants, replace=False)
#Exchange migrants
sub_pops[i][-num_migrants:] = non_migrants
sub_pops[target][:num_migrants] = migrants
return allele_freqs