-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdb_functions.py
110 lines (91 loc) · 4.89 KB
/
pdb_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import score_functions as sf
def get_template_atoms(file_path, alignment_dataframe, template_name):
template_atoms = pd.read_csv(file_path, sep=" ", names=["Residue", "Position"])
template_atoms["Position"] = template_atoms["Position"].astype(int)
alignment_series = alignment_dataframe.loc[alignment_dataframe[template_name] != '-', template_name]
template_atoms = alignment_series.index[template_atoms["Position"].to_list()]
return template_atoms
def compile_shell_residues(target_name, target_path, template_atoms, alignment_dataframe):
temp = alignment_dataframe[target_name]
position = alignment_dataframe[target_name].iloc[template_atoms]
numeric_position = []
gap_count = 0
counter = 0
for index in range(len(position)):
if '-' in position.iloc[index]:
gap_count += 1
else:
numeric_position.append(int(position.iloc[index][1:]))
current_score_dataframe = sf.compile_score_data_frame(target_path, docking_flag=True)
interface_list = []
subscore_list = []
total_score_list = []
for filename in os.listdir(target_path):
if filename.endswith('.pdb'):
row_boolean = current_score_dataframe['description'] == filename[:-4]
row_index = [i for i, value in enumerate(row_boolean) if value]
interface_score = current_score_dataframe['total_interface_energy'].iloc[row_index[0]]
total_score = current_score_dataframe['total_score'].iloc[row_index[0]]
interface_list.append(interface_score)
total_score_list.append(total_score_list)
# need to make an array for calculating the average interface_score
pdb_file = open(target_path + '/' + filename)
pdb = pdb_file.read()
pdb_file.close()
if '.gz' in pdb:
gz = '.gz'
else:
gz = ''
pdb = pdb.split("\n")
search_string_beginning = "#BEGIN_POSE_ENERGIES_TABLE " + filename + gz
search_string_ending = "#END_POSE_ENERGIES_TABLE " + filename + gz
start_index = pdb.index(search_string_beginning)
end_index = pdb.index(search_string_ending)
new_list = []
for index in range(start_index + 2, end_index):
new_list.append(pdb[index].split(" "))
dataframe = pd.DataFrame(new_list, columns=pdb[start_index + 1].split(' '))
subtotal_score = 0
if counter == 0: # bandaid fix
numeric_position = numeric_position + [dataframe.shape[0] - 1]
counter += 1
else:
pass
for index in numeric_position:
try:
subtotal_score += float(dataframe["total"].iloc[index])
except:
print(target_name + "'s structure is probably truncated.")
subscore_list.append(subtotal_score)
position = pd.concat([position, dataframe["label"].tail(1)], ignore_index=True)
# need to make an array for calculating the average of subtotal_score
if 'contribution_dataframe' in locals():
contribution_dataframe = pd.concat([contribution_dataframe, dataframe.iloc[numeric_position, [0, -1]]],
ignore_index=True)
else:
contribution_dataframe = dataframe.iloc[numeric_position, [0, -1]]
return [subscore_list], [interface_list], [position], gap_count, [total_score_list], contribution_dataframe
def residue_contribution(gene, filename_tag, check_dataframe, current_path):
check_dataframe.sort_values(by="label")
new_dataframe = pd.DataFrame(columns=["residue", "contribution"])
residue_means = []
for residue in check_dataframe.label.unique():
current_list = check_dataframe.loc[check_dataframe["label"] == residue, "total"].to_list()
current_subdataframe = pd.DataFrame(data={"residue": residue, "contribution": [np.float_(current_list)]})
new_dataframe = pd.concat([new_dataframe, current_subdataframe], ignore_index=True)
residue_means.append(np.mean(np.float_(current_list)))
index_sort_by_means = np.argsort(residue_means)[::-1]
figure = plt.figure(figsize=(8, 8))
axs = plt.gca()
box_plot = axs.boxplot(new_dataframe.contribution[index_sort_by_means],
vert=False, # vertical box alignment
patch_artist=True, # fill with color
labels=new_dataframe.residue[index_sort_by_means]) # will be used to label x-ticks
axs.set_title("Shell score residue breakdown for " + gene)
axs.set_xlabel("Weighted total score for individual residue")
plt.close(figure)
figure.savefig(current_path + "residue_contribution_" + gene + filename_tag + ".png")