-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRescaled_Csat.py
317 lines (271 loc) · 14 KB
/
Rescaled_Csat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
'''
Created on Aug 13, 2020
@author: mina
'''
#Import modules
import numpy as np
from scipy import stats
import matplotlib.colors as colors
import matplotlib as mpl
from data_set_example import seq_dic
#Define constants
FONT = 'Arial'
LINE_WIDTH = 1
TITLE_SIZE = 7
LEGEND_SIZE = 7
AXES_LABEL_SIZE = 7
AXES_TICK_SIZE = 7
POINT_LABEL_SIZE = 7
ANNOTATION_SIZE = 7
MARKER_SIZE = 4
MARKER_EDGE_WIDTH = 0.4
TICK_LENGTH = 2
TICK_WIDTH = 1
TICK_PAD = 1
X_AXIS_PAD = 1
Y_AXIS_PAD = 1
mpl.rcParams['axes.linewidth'] = LINE_WIDTH
class rescaler:
""" rescaler
Calculates rescaled csat values based on mean-field model
Arguments:
----------
x0: list, shape = [aro_r_term, k_term]
The initial lambda parameters for the mean-field model
var_list: list
The list of variants to parameterize the model on
temp_list: list
The list of temperatures at which there are csat values to analyze
penalty: float
The residual penalty associated with a negative rescaled csat value
When this occurs, "penalty" will be returned as the residual value for the fitting process
In general, this value should be relatively high so that the parameterization never causes negative csat values
var_list_full: list, optional
If included, will calculate final linear regression and plots using var_list_full
"""
def __init__(self, x0, var_list, temp_list, penalty, var_list_full = 0):
self.var_list = var_list
self.temp_list = temp_list
self.penalty = penalty
self.var_list_full = var_list_full
self.aro_aro = 1
self.aro_r = x0[0]
self.three_body_k = x0[1]
#These dictionaries and lists allow us to build rescaled csat values with different models for plotting later
self.ncpr_list = {}
self.ncpr_list_left = []
self.ncpr_list_right = []
self.ncpr_list_full = {}
self.ncpr_list_left_full = []
self.ncpr_list_right_full = []
#Raw csat lists
self.csat_list = {}
self.csat_list_left = []
self.csat_list_right = []
#csat lists rescaled by temperatures
self.csat_list_temp = {}
self.csat_list_left_temp = []
self.csat_list_right_temp = []
#csat lists rescaled using aro-aro and aro-arg interactions
self.csat_list_aro_r = {}
self.csat_list_aro_r_left = []
self.csat_list_aro_r_right = []
#csat lists rescaled using aro-aro, aro-arg, and destabilizing lys interactions
self.csat_list_aro_r_k = {}
self.csat_list_aro_r_k_left = []
self.csat_list_aro_r_k_right = []
#rescaled csat lists using var_list_full, if applicable
self.csat_list_aro_r_k_full = {}
self.csat_list_aro_r_k_left_full = []
self.csat_list_aro_r_k_right_full = []
#m_dic will holds the slopes of the dilute arms
self.m_dic = {}
#res_penalty is zero unless the lambda parameters result in a negative rescaled csat value
self.res_penalty = 0
for temp in self.temp_list:
self.ncpr_list[temp] = []
self.ncpr_list_full[temp] = []
self.csat_list[temp] = []
self.csat_list_temp[temp] = []
self.csat_list_aro_r[temp] = []
self.csat_list_aro_r_k[temp] = []
self.csat_list_aro_r_k_full[temp] = []
if self.var_list_full == 0:
self.var_list_used = self.var_list
else:
self.var_list_used = self.var_list_full
#Calculate a master m-value by averaging the slopes of the dilute arms
if len(self.temp_list) > 1:
self.m_value_full = 0
self.m_count_full = 0
for var in self.var_list:
temps = []
csats = []
for temp in self.temp_list:
csat = seq_dic[var]['csat'][temp]
if csat != 0:
temps.append(int(temp))
csats.append(np.log(csat))
# slope, intercept, r_value, p_value, std_err
fit = stats.linregress(csats, temps)
if len(temps) > 1:
self.m_dic[var] = fit[0]
if var in self.var_list:
self.m_value_full += fit[0]
self.m_count_full += 1
if self.m_value_full != 0:
self.m_value_full /= self.m_count_full
else:
self.m_value_full = 1
else:
self.m_value_full = 1
#The fit_list variables store the values for the final linear regressions of the V-shape
self.fit_list_x_full_left = []
self.fit_list_x_full_right = []
self.fit_list_y_full_left = []
self.fit_list_y_full_right = []
#Iterate through the variants and fill out the previously initialized lists
for var in self.var_list_used:
self.fit_list_left = []
self.fit_list_right = []
for temp in self.temp_list:
if seq_dic[var]['csat'][temp] != 0:
temp_norm = np.exp((int(temp) - 4) / self.m_value_full)
#temp_norm = np.exp((int(temp) - 4) / m_dic[var])
renorm_r_aro = 2 * self.aro_r * seq_dic[var]['aro'] * seq_dic[var]['r'] \
+ self.aro_aro * seq_dic[var]['aro'] ** 2
renorm_r_aro_k = (seq_dic[var]['aro'] ** 2) * (self.aro_aro - 3 * \
self.three_body_k * seq_dic[var]['k']) + (seq_dic[var]['aro'] * \
seq_dic[var]['r']) * (2 * self.aro_r - 6 * self.three_body_k * seq_dic[var]['k'])
renorm_r_aro /= temp_norm
renorm_r_aro_k /= temp_norm
if renorm_r_aro_k <= 0:
self.res_penalty = self.penalty
if var in self.var_list_used:
self.ncpr_list[temp].append(seq_dic[var]['ncpr'])
self.csat_list[temp].append(seq_dic[var]['csat'][temp])
self.csat_list_temp[temp].append(seq_dic[var]['csat'][temp] / temp_norm)
self.csat_list_aro_r[temp].append(renorm_r_aro * seq_dic[var]['csat'][temp])
self.csat_list_aro_r_k[temp].append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
self.ncpr_list_full[temp].append(seq_dic[var]['ncpr'])
self.csat_list_aro_r_k_full[temp].append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
#For initial purposes, variant +7R+12D was used as the mid-point of the V-shape
#This can easily be changed by manipulating the following line
midpoint = seq_dic['7r12d']['ncpr']
if seq_dic[var]['ncpr'] <= midpoint:
if var in self.var_list_used:
self.ncpr_list_left.append(seq_dic[var]['ncpr'])
self.csat_list_left.append(seq_dic[var]['csat'][temp])
self.csat_list_left_temp.append(seq_dic[var]['csat'][temp] / temp_norm)
self.csat_list_aro_r_left.append(renorm_r_aro * seq_dic[var]['csat'][temp])
self.csat_list_aro_r_k_left.append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
self.ncpr_list_left_full.append(seq_dic[var]['ncpr'])
self.csat_list_aro_r_k_left_full.append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
self.fit_list_left.append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
if seq_dic[var]['ncpr'] >= midpoint:
if var in self.var_list_used:
self.ncpr_list_right.append(seq_dic[var]['ncpr'])
self.csat_list_right.append(seq_dic[var]['csat'][temp])
self.csat_list_right_temp.append(seq_dic[var]['csat'][temp] / temp_norm)
self.csat_list_aro_r_right.append(renorm_r_aro * seq_dic[var]['csat'][temp])
self.csat_list_aro_r_k_right.append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
self.ncpr_list_right_full.append(seq_dic[var]['ncpr'])
self.csat_list_aro_r_k_right_full.append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
self.fit_list_right.append(renorm_r_aro_k * seq_dic[var]['csat'][temp])
if seq_dic[var]['ncpr'] <= midpoint:
self.fit_list_left = np.mean(self.fit_list_left)
self.fit_list_y_full_left.append(self.fit_list_left)
self.fit_list_x_full_left.append(seq_dic[var]['ncpr'])
if seq_dic[var]['ncpr'] >= midpoint:
self.fit_list_right = np.mean(self.fit_list_right)
self.fit_list_y_full_right.append(self.fit_list_right)
self.fit_list_x_full_right.append(seq_dic[var]['ncpr'])
#Perform a linear regression on either side of the V-shape
self.slope_left, self.intercept_left, self.r_value_left, self.p_value_left, self.std_err_left = \
stats.linregress(self.fit_list_x_full_left, np.log(self.fit_list_y_full_left))
self.slope_right, self.intercept_right, self.r_value_right, self.p_value_right, self.std_err_right = \
stats.linregress(self.fit_list_x_full_right, np.log(self.fit_list_y_full_right))
#Calculate the total residual from the linear fits for the parameterization process
self.res = 0
for i, ncpr in enumerate(self.fit_list_x_full_left):
self.res += (np.log(self.fit_list_y_full_left[i]) - \
(self.intercept_left + self.slope_left * ncpr)) ** 2
for i, ncpr in enumerate(self.fit_list_x_full_right):
self.res += (np.log(self.fit_list_y_full_right[i]) - \
(self.intercept_right + self.slope_right * ncpr)) ** 2
def tick_param_setter(self, ax, direction = 'in'):
""" tick_param_setter
A helper function to improve plot appearances
Arguments:
----------
ax: matplotlib axes object
The axes object on which to plot
direction: either "in" or "out"
Determines whether the ticks are in or out of the panel frame
"""
ax.tick_params(labelsize = AXES_TICK_SIZE, direction = direction,
length = TICK_LENGTH, width = TICK_WIDTH, color = 'black',
labelcolor = 'black', pad = TICK_PAD)
ax.xaxis.labelpad = X_AXIS_PAD
ax.yaxis.labelpad = Y_AXIS_PAD
for tick in ax.xaxis.get_major_ticks():
tick.label1.set_fontsize(AXES_TICK_SIZE)
tick.label1.set_fontweight('bold')
tick.label1.set_fontname(FONT)
for tick in ax.yaxis.get_major_ticks():
tick.label1.set_fontsize(AXES_TICK_SIZE)
tick.label1.set_fontweight('bold')
tick.label1.set_fontname(FONT)
def ax_filler(self, ax, xlabel, ylabel, y_list, our_temp_list, full_check):
""" ax_filler
A helper function to plot rescaled csat values
Arguments:
----------
ax: matplotlib axes object
The axes object on which to plot
xlabel: string
The label of the x-axis
ylabel: string
The label of the y-axis
y_list: list
The rescaled csat list to plot
May be one of csat_list, csat_list_temp, csat_list_aro_r,
csat_list_aro_r_k, or csat_list_aro_r_k_full
our_temp_list: list
The temperature list to use for plotting
full_check: boolean
Determines whether to use var_list or var_list_full
"""
if full_check:
var_list_used = self.var_list_full
ncpr_list_used = self.ncpr_list_full
else:
var_list_used = self.var_list
ncpr_list_used = self.ncpr_list
ax.set_xlabel(xlabel, fontname = FONT, size = AXES_LABEL_SIZE, weight = 'bold')
ax.set_ylabel(ylabel, fontname = FONT, size = AXES_LABEL_SIZE, weight = 'bold')
ax.set(yscale="log")
ax.minorticks_off()
self.tick_param_setter(ax)
for temp in our_temp_list:
ml = ['o' if var in self.var_list else 'D' for var in var_list_used if seq_dic[var]['csat'][temp] != 0]
#opacity function varies the opacity of a marker based on the given temperature
def opacity(temp):
if len(our_temp_list) > 1:
float_temp_list = [float(temp) for temp in our_temp_list]
return 1 + (min(float_temp_list) - float(temp)) / (max(float_temp_list) - min(float_temp_list))
else:
return 1
color_list = [colors.to_rgba(seq_dic[var]['colorFig2'], opacity(temp)) for var in var_list_used if seq_dic[var]['csat'][temp] != 0]
for index, ncpr in enumerate(ncpr_list_used[temp]):
ax.errorbar(x = ncpr, y = y_list[temp][index], fmt = ml[index],
markersize = MARKER_SIZE, markeredgecolor = 'black',
markeredgewidth = MARKER_EDGE_WIDTH, color = color_list[index],
label=temp + '$^\circ$C')
#residual_returner retruns the residual calculated from above
#If any of the rescaled csat values were negative, this function returns "res_penalty"
def residual_returner(self):
if self.res_penalty > 0:
return self.res_penalty
else:
return self.res