-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes to BCRPhylo to allow for mapping BCR sequence to affinity and expression #7
base: main
Are you sure you want to change the base?
Changes from all commits
93bcb63
f016446
6cfbd03
102281e
ec4f111
7c62726
db930de
01dd8dc
32f8fd3
e9142d3
b89032c
35019c2
57fffba
af57d30
5f69889
ee41693
ad1cd72
230aefb
6700908
28a8cd9
f7d66ed
41c0846
0892087
b1243c2
60b3b33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,16 +92,16 @@ def init_blosum(): | |
blinfo = {aa1 : {aa2 : None for aa2 in all_amino_acids} for aa1 in all_amino_acids} | ||
init_blosum() | ||
sdists = { # config info necessary for rescaling the two versions of aa similarity distance | ||
'ascii' : { | ||
'scale_min' : 0., # you want the mean to be kinda sorta around 1, so the --target_dist ends up being comparable | ||
'scale_max' : 4.7, | ||
}, | ||
'blosum' : { | ||
'scale_min' : 0., | ||
# 'scale_max' : 3.95, # use this value if you go back to taking the exp() (note that now the exp() would need to be added in two places) | ||
'scale_max' : 1.55, | ||
}, | ||
} | ||
'ascii' : { | ||
'scale_min' : 0., # you want the mean to be kinda sorta around 1, so the --target_dist ends up being comparable | ||
'scale_max' : 4.7, | ||
}, | ||
'blosum' : { | ||
'scale_min' : 0., | ||
# 'scale_max' : 3.95, # use this value if you go back to taking the exp() (note that now the exp() would need to be added in two places) | ||
'scale_max' : 1.55, | ||
}, | ||
} | ||
for sdtype, sdinfo in sdists.items(): | ||
if sdtype == 'ascii': | ||
dfcn = aa_ascii_code_distance | ||
|
@@ -134,9 +134,9 @@ def plot_sdists(): | |
print(' raw rescaled raw rescaled') | ||
for aa2 in all_amino_acids: | ||
print(' %s %5.2f %5.2f %5.1f %5.2f' % (aa2, | ||
aa_inverse_similarity(aa1, aa2, 'blosum', dont_rescale=True), aa_inverse_similarity(aa1, aa2, 'blosum'), | ||
aa_inverse_similarity(aa1, aa2, 'ascii', dont_rescale=True), aa_inverse_similarity(aa1, aa2, 'ascii'))) | ||
import plotutils | ||
aa_inverse_similarity(aa1, aa2, 'blosum', dont_rescale=True), aa_inverse_similarity(aa1, aa2, 'blosum'), | ||
aa_inverse_similarity(aa1, aa2, 'ascii', dont_rescale=True), aa_inverse_similarity(aa1, aa2, 'ascii'))) | ||
import plotutils | ||
for sdtype in ['ascii', 'blosum']: | ||
print(sdtype) | ||
all_rescaled_vals = [aa_inverse_similarity(aa1, aa2, sdtype=sdtype) for aa1, aa2 in itertools.combinations_with_replacement(all_amino_acids, 2)] | ||
|
@@ -164,55 +164,74 @@ def target_distance_fcn(args, this_seq, target_seqs): | |
def calc_kd(node, args): | ||
if has_stop_aa(node.aa_seq): # nonsense sequences have zero affinity/infinite kd | ||
return float('inf') | ||
if not args.selection: | ||
return 1. | ||
|
||
assert args.mature_kd < args.naive_kd | ||
tdist = node.target_distance if args.min_target_distance is None else max(node.target_distance, args.min_target_distance) | ||
kd = args.mature_kd + (args.naive_kd - args.mature_kd) * (tdist / float(args.target_distance))**args.k_exp # transformation from distance to kd | ||
|
||
elif args.function_target_distance: | ||
kd = args.mature_kd + (args.naive_kd - args.mature_kd) * (tdist / float(args.target_distance))**args.k_exp # transformation from distance to kd | ||
elif args.torchdms: | ||
kd = 100 - 3*min(node.aa_seq.count('A'),33) ### arbitrary sequence to function mapping until we implement the torchdms api | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should wait to commit the torchdms stuff until it's implemented -- better to limit matsengrp/master to finished things. |
||
else: ## if no tags are specified, implement a neutral simulation | ||
kd = args.mature_kd | ||
return kd | ||
|
||
def calc_B_exp(node, args): | ||
if has_stop_aa(node.aa_seq): # nonsense sequences have no production | ||
B_exp = 0 | ||
elif args.torchdms: | ||
B_exp = args.B_total + 500*min(node.aa_seq.count('A'),10) ### arbitrary sequence to function mapping until we implement the torchdms api | ||
else: ## for neutral simulation or the target_distance function simulation, use a fixed amount (args.B_total default is 10000) of BCR | ||
B_exp = args.B_total | ||
return B_exp | ||
|
||
# ---------------------------------------------------------------------------------------- | ||
def update_lambda_values(live_leaves, A_total, B_total, logi_params, selection_strength, lambda_min=10e-10): | ||
''' update the lambda_ feature (parameter for the poisson progeny distribution) for each leaf in <live_leaves> ''' | ||
|
||
# ---------------------------------------------------------------------------------------- | ||
def calc_BnA(Kd_n, A, B_total): | ||
#Since each B cell now has its own BCR expression level, would need to calculate B_total | ||
def sum_of_B(B_n): | ||
B_total = numpy.sum(B_n) | ||
return B_total | ||
|
||
def calc_BnA(Kd_n, A, B_n): | ||
''' | ||
This calculates the fraction B:A (B bound to A), at equilibrium also referred to as "binding time", | ||
of all the different Bs in the population given the number of free As in solution. | ||
''' | ||
BnA = B_total/(1+Kd_n/A) | ||
BnA = B_n/(1+Kd_n/A) | ||
return(BnA) | ||
|
||
# ---------------------------------------------------------------------------------------- | ||
def return_objective_A(Kd_n, A_total, B_total): | ||
def return_objective_A(Kd_n, A_total, B_n): | ||
''' | ||
The objective function that solves the set of differential equations setup to find the number of free As, | ||
at equilibrium, given a number of Bs with some affinity listed in Kd_n. | ||
''' | ||
return lambda A: (A_total - (A + scipy.sum(B_total/(1+Kd_n/A))))**2 | ||
|
||
return lambda A: (A_total - (A + numpy.sum(B_n/(1+Kd_n/A))))**2 | ||
# ---------------------------------------------------------------------------------------- | ||
def calc_binding_time(Kd_n, A_total, B_total): | ||
def calc_binding_time(Kd_n, A_total, B_n): | ||
''' | ||
Solves the objective function to find the number of free As and then uses this, | ||
to calculate the fraction B:A (B bound to A) for all the different Bs. | ||
''' | ||
obj = return_objective_A(Kd_n, A_total, B_total) | ||
obj = return_objective_A(Kd_n, A_total, B_n) | ||
# Different minimizers have been tested and 'L-BFGS-B' was significant faster than anything else: | ||
obj_min = minimize(obj, A_total, bounds=[[1e-10, A_total]], method='L-BFGS-B', tol=1e-20) | ||
BnA = calc_BnA(Kd_n, obj_min.x[0], B_total) | ||
BnA = calc_BnA(Kd_n, obj_min.x[0], B_n) | ||
#print(Kd_n) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's best not to commit commented lines, just to keep things clean. |
||
#print(obj_min.x[0]) | ||
#print(B_n) | ||
# Terminate if the precision is not good enough: | ||
assert(BnA.sum()+obj_min.x[0]-A_total < A_total/100) | ||
assert(BnA.sum()+obj_min.x[0]-A_total < A_total/100) | ||
return BnA | ||
|
||
# ---------------------------------------------------------------------------------------- | ||
def trans_BA(BnA): | ||
'''Transform the fraction B:A (B bound to A) to a poisson lambda between 0 and 2.''' | ||
# We keep alpha to enable the possibility that there is a minimum lambda_: | ||
lambda_ = alpha + (2 - alpha) / (1 + Q*scipy.exp(-beta*BnA)) | ||
#print(beta) | ||
#print(BnA) | ||
power_val = -beta*BnA | ||
power_val[power_val < -125] = -125 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's this line doing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Naveen (pasting from slack): I was getting underflow errors. So this takes the -betaBnA values in the array power_val and changes them so if value < -125, it becomes -125. Ultimately, in the logistic equation for lambda, the Qnumpy.exp(power_val) goes to 0 if power_val is very negative. So assigning the value -125 is functionally equivalent but gets rid of underflow values. That might be a hacky method, and the choice of -125 itself was arbitrary. Any other very low value that prevents underflow values will do. |
||
lambda_ = alpha + (2 - alpha) / (1 + Q*numpy.exp(power_val)) | ||
return [max(lambda_min, l) for l in lambda_] | ||
|
||
# ---------------------------------------------------------------------------------------- | ||
|
@@ -240,8 +259,9 @@ def getvar(lvals): | |
|
||
# ---------------------------------------------------------------------------------------- | ||
alpha, beta, Q = logi_params | ||
Kd_n = scipy.array([l.Kd for l in live_leaves]) | ||
BnA = calc_binding_time(Kd_n, A_total, B_total) # get list of binding time values for each cell | ||
Kd_n = numpy.array([l.Kd for l in live_leaves]) | ||
B_n = numpy.array([l.B_exp for l in live_leaves]) | ||
BnA = calc_binding_time(Kd_n, A_total, B_n) # get list of binding time values for each cell | ||
new_lambdas = trans_BA(BnA) # convert binding time list to list of poisson lambdas for each cell (which determine number of offspring) | ||
if selection_strength < 1: | ||
new_lambdas = apply_selection_strength_scaling(new_lambdas) | ||
|
@@ -260,7 +280,7 @@ def A_obj(carry_cap, B_total, f_full, Kd_n, U): | |
def obj(A): return((carry_cap - C_A(A, A_total_fun(A, B_total, Kd_n), f_full, U))**2) | ||
return obj | ||
|
||
Kd_n = scipy.array([mature_kd] * carry_cap) | ||
Kd_n = numpy.array([mature_kd] * carry_cap) | ||
obj = A_obj(carry_cap, B_total, f_full, Kd_n, U) | ||
# Some funny "zero encountered in true_divide" errors are not affecting results so ignore them: | ||
old_settings = scipy.seterr(all='ignore') # Keep old settings | ||
|
@@ -360,6 +380,80 @@ def make_bounds(tdist_hists): # tdist_hists: list (over generations) of scipy.h | |
plt.title('population over time of cells grouped by min. distance to target') | ||
fig.savefig(outbase + '.selection_sim.runstats.pdf') | ||
|
||
def make_list(array): | ||
#print(type(array)) | ||
dummy_list = [] | ||
for row in array: | ||
dummy_list.append(numpy.ndarray.tolist(row)) | ||
#dummy_list.append(row) | ||
#print(row) | ||
#print(type(row)) | ||
return dummy_list | ||
|
||
def make_equal_lengths(a): | ||
row_lengths = [] | ||
for row in a: | ||
row_lengths.append(len(row)) | ||
max_length = max(row_lengths) | ||
#print(max_length) | ||
b = make_list(a) | ||
for row in b: | ||
while len(row) < max_length: | ||
row.append(None) | ||
#print(row) | ||
return b | ||
|
||
def sort_it_out(bcell_index): | ||
#inf = float("inf") | ||
sorted_scatter = [] | ||
for element in bcell_index: | ||
sort_row = sorted(element, key=lambda x: float('inf') if x is None else x) | ||
sorted_scatter.append(sort_row) | ||
return sorted_scatter | ||
|
||
def none_removed(index_sorted): | ||
#inf = float("inf") | ||
dummy = [] | ||
for element in index_sorted: | ||
sort_row = [] | ||
for i in element: | ||
if not math.isinf(i): | ||
sort_row.append(i) | ||
dummy.append(sort_row) | ||
return dummy | ||
|
||
def plot_runstats2(scatter_value, scatter_index, desired_name): | ||
fig = plt.figure() | ||
ax = plt.subplot(111) | ||
x_index = make_equal_lengths(scatter_index) | ||
y_index = make_equal_lengths(scatter_value) | ||
#print(x_index) | ||
#print(y_index) | ||
ax.scatter(x_index, y_index) | ||
#plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0) | ||
# Shrink current axis by 20% to make the legend fit: | ||
#box = ax.get_position() | ||
#ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) | ||
|
||
plt.ylabel(desired_name) | ||
plt.xlabel('GC generation') | ||
plt.title('distribution of BCR value at each generation') | ||
fig.savefig('GCsim' + desired_name + '.sim_scatter.pdf') | ||
|
||
def plot_runstats3(bcr_value, scatter_index, desired_name): | ||
numpy.seterr(under='ignore') | ||
fig = plt.figure() | ||
ax = plt.subplot(111) | ||
what_i_want_to_plot = none_removed(sort_it_out(bcr_value)) | ||
y_index = [] | ||
for i in range(len(scatter_index)//5): | ||
y_index.append(what_i_want_to_plot[5*i+5]) | ||
ax.violinplot(y_index) | ||
plt.ylabel(desired_name) | ||
plt.xlabel('Every 5th GC generation') | ||
plt.title('distribution of BCR value at each generation') | ||
fig.savefig('GCsim' + desired_name + '.sim_violin.pdf') | ||
|
||
# ---------------------------------------------------------------------------------------- | ||
# bash color codes | ||
Colors = {} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I'm understanding correctly this will change the default behavior (from using this function to straight neutral). Unless I'm mistaken this should change -- it's extremely important to avoid changing default behavior if at all possible (i.e. if someone runs with the same command line options they should get the same results).