-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_cr_peptides.py
260 lines (205 loc) · 9.14 KB
/
predict_cr_peptides.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# Sgourakis Lab
# Author: Sagar Gupta
# Date: April 28, 2023
# Email: [email protected]
# import required libraries
from collections import defaultdict
import csv
import math
import os
import platform
from pymol import cmd
import subprocess
from tqdm import tqdm
def get_pdb_dict(fastafile):
'''
Reads FASTA file
'''
fasta_file_handler = open(fastafile, "r")
counter = 1
raw_dict = {}
pdbid = ""
for line in fasta_file_handler:
if ">" in line:
line = line.rstrip().lstrip('>')
pdbid = line.split('|')[0]
allele = line.split('|')[2]
if counter % 2 == 0:
raw_dict[pdbid] = ""
counter += 1
else:
line = line.rstrip()
raw_dict[pdbid] = line+"_"+allele
fasta_file_handler.close()
pdb_dict = {}
for key, value in raw_dict.items():
if value != "":
pdb_dict[key] = value
return pdb_dict
def run_command(command, ignore=False):
'''
Helps run command line operations
'''
cmd = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output = cmd.communicate()
if cmd.returncode != 0:
if ignore:
return False
print("--- FAIL ---\n")
print(command)
if output[0] != None:
print(str(output[0], 'utf-8'))
if output[1] != None:
print(str(output[1], 'utf-8'))
print("\n--- FAIL ---")
return False
else:
if output[0] != None:
output = output[0].decode('utf-8')
return output
def distance(angle1, angle2):
'''
Determines the dihedral difference between two angles
Assumes inputted angles are in degrees
'''
distance = 2.0 * (1.0 - math.cos(math.radians(angle1) - math.radians(angle2)))
return distance
def make_resfile(directory, end_peptide_seq, structure_pepchain):
'''
Write resfile which mutates the template peptide sequence
'''
with open(f"{directory}/{end_peptide_seq}.resfile", "w") as resfile:
# preserve the input rotamer (do not pack at all) - applies to everything
resfile.write("NATRO\n")
resfile.write("start\n")
for pos, end_aa in enumerate(end_peptide_seq):
resfile.write(f"{pos+1} {structure_pepchain} PIKAA {end_aa}\n")
return f"{directory}/{end_peptide_seq}.resfile"
def main():
'''
THE LINE BELOW NEEDS TO BE CHANGED BASED ON THE USER
'''
ROSETTA_INSTALL_DIR = "/path_to_dir/rosetta/"
os_system = platform.system()
extension = ''
if os_system == 'Linux':
extension = 'linuxgccrelease'
elif os_system == 'Darwin':
extension = 'macosclangrelease'
'''
Crystal structure is WT A*24:02 (chain A) bound to PHOX2B (chain B) in complex with 10LH, heavy chain (chain H) and light chain (chain L)
'''
crystal_structure = f"PHOX2B_A2402_10LH.pdb"
# pHLA-I structural database taken from https://hla3db.research.chop.edu with the
# following query: peptide length 9, anchor class ∆7
# as of April 28th, 2023 there were 315 structures
# NOTE: The manuscript uses a cutoff date of April 29th, 2022
pdb_dict = get_pdb_dict("MHC_pdbs/MHCs.fasta")
'''
Replaces the PHOX2B peptide backbone in the A2402 and 10LH complex
'''
workdir1 = "template_graft/"
os.system(f"mkdir -p {workdir1}")
print(f"Grafting template peptide backbone via HLA alignment (n = {len(pdb_dict)}) (runtime: ~20 seconds)")
for pdbid in tqdm(pdb_dict.keys()):
cmd.load(crystal_structure, "10LH")
cmd.load(f"MHC_pdbs/{pdbid}_reordered.pdb", "template")
cmd.super("template and chain A and name ca", "10LH and chain A and name ca", cycles=0) # align HLA Ca atoms
cmd.remove("template and chain A") # removes template HLA
cmd.remove("10LH and chain B") # removes 10LH peptide (PHOX2B)
cmd.save(f"{workdir1}/{pdbid}_A2402_10LH.pdb")
cmd.delete("all")
'''
Replaces the new backbone sequence with PHOX2B peptide sequence and performs \extensive\ rotamer sampling
'''
print("Introducing hotspot residues and conducting extensive rotamer sampling (runtime: ~35 minutes)")
workdir2 = "template_graft_hotspot/"
os.system(f"mkdir -p {workdir2}")
for pdbid in tqdm(pdb_dict.keys()):
resfile_name = make_resfile(workdir2, 'AAAAARAAA', "B") # change the peptide sequence to 'alanine' PHOX2B peptide sequence
template_structure_new_bb = f"{workdir1}/{pdbid}_A2402_10LH.pdb"
# since R6 is not aromatic, we do not increase rotamer sampling for aromatic residues
os.system(f"{ROSETTA_INSTALL_DIR}/main/source/bin/fixbb.{extension} -database {ROSETTA_INSTALL_DIR}/main/database -s {template_structure_new_bb} -out:path:all {workdir2} -resfile {resfile_name} -suffix _PHOX2B -ex1 -ex2 -ex3 -ex4 -overwrite")
'''
Gets relevant distances/angles between R6 and D232 in the fixed backbone models
'''
print("Selecting template backbones with ideal target side chain geometry (runtime: ~15 seconds)")
output_dir = "output"
os.system(f"mkdir -p {output_dir}")
with open(f"{output_dir}/template_graft_hotspot_dist_angle.csv", "w") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['pdbid', 'R6-CZ_D104-DG', "R6-CZ_R6-NH1_D104-OD2_D104-CG", "R6-CZ_R6-NH1_D104-OD2_D104-CG"])
for pdbid in tqdm(pdb_dict.keys()):
fixbb_model = f"{workdir2}/{pdbid}_A2402_10LH_PHOX2B_0001.pdb"
cmd.load(fixbb_model, "struc")
cz_cg_distance = cmd.get_distance("struc//B/ARG`6/CZ", "struc//H/ASP`104/CG")
nh1_od2_dihedral_angle = cmd.get_dihedral("struc//B/ARG`6/CZ", "struc//B/ARG`6/NH1", "struc//H/ASP`104/OD2", "struc//H/ASP`104/CG")
nh2_od1_dihedral_angle = cmd.get_dihedral("struc//B/ARG`6/CZ", "struc//B/ARG`6/NH2", "struc//H/ASP`104/OD1", "struc//H/ASP`104/CG")
cmd.delete("all")
writer.writerow([pdbid, cz_cg_distance, nh1_od2_dihedral_angle, nh2_od1_dihedral_angle])
template_graft_hotspot_dist_angle = defaultdict(list)
with open(f"{output_dir}/template_graft_hotspot_dist_angle.csv", "r") as csvfile:
reader = csv.reader(csvfile)
next(reader)
for line in reader:
template_graft_hotspot_dist_angle[line[0]] = [float(x) for x in line[1:]]
'''
Finds fixbb models that fit the criteria for a R6/D232 interaction
Criteria:
1. Dihedral difference (D-score) between (CZ, NH1, and CG, OD1) + (CZ, NH2, and CG, OD2) < 3.5 and each individual D-score < 2.3
2. Distance between CG and CZ is < 5 Å
For reference, in 10LH:PHOX2B/A24, the distance between CG and CZ is 4.1 Å
'''
cmd.load(crystal_structure, "10LH")
REF_NH1_OD2 = cmd.get_dihedral("10LH//B/ARG`6/CZ", "/10LH//B/ARG`6/NH1", "10LH//H/ASP`104/OD2", "10LH//H/ASP`104/CG")
REF_NH2_OD1 = cmd.get_dihedral("10LH//B/ARG`6/CZ", "/10LH//B/ARG`6/NH2", "10LH//H/ASP`104/OD1", "10LH//H/ASP`104/CG")
cmd.delete("all")
valid_models = []
for pdbid, metrics in template_graft_hotspot_dist_angle.items():
cz_cg_distance = metrics[0]
nh1_od2_dihedral_angle = metrics[1]
nh2_od1_dihedral_angle = metrics[2]
if cz_cg_distance > 5:
continue
dscore1 = distance(nh1_od2_dihedral_angle, REF_NH1_OD2)
dscore2 = distance(nh2_od1_dihedral_angle, REF_NH2_OD1)
combined_dscore = dscore1 + dscore2
if combined_dscore < 4 and dscore1 < 2.3 and dscore2 < 2.3:
valid_models.append(pdbid)
print(f"There are {len(valid_models)} valid models")
with open(f"{output_dir}/valid_models.txt", "w") as txtfile:
for pdbid in valid_models:
txtfile.write(f"{pdbid}\n")
'''
Generate heteroclitic peptides with target hotspot residues
'''
new_pep_seq = {}
for pdbid in valid_models:
peptide = pdb_dict[pdbid].split("_")[0] # template peptide sequence
new_peptide = [""]*9
for index, resi in enumerate(peptide):
pos = index+1
# establish A*24 anchors
if pos == 2:
new_peptide[index] = 'Y'
elif pos == 9:
new_peptide[index] = 'F'
# establish PHOX2B hotspot residue
elif pos == 6:
new_peptide[index] = "R"
# reduce positive charge around R6
elif (pos == 7) and (resi == "R" or resi == "H" or resi == "K"):
new_peptide[index] = "G"
elif (pos == 4 or pos == 5) and (resi == "R" or resi == "H" or resi == "K"):
new_peptide[index] = "I"
else:
new_peptide[index] = resi
new_peptide = "".join(new_peptide)
new_pep_seq[pdbid] = new_peptide
unique_pep_seq = list(set(new_pep_seq.values()))
print(f"There are {len(unique_pep_seq)} unique peptide sequences")
with open(f"{output_dir}/potential_cr_pep_seq.txt", "w") as txtfile:
for pep_seq in unique_pep_seq:
txtfile.write(f"{pep_seq}\n")
if __name__ == "__main__":
main()