-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
204 lines (165 loc) · 9.56 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
2#!/usr/bin/python
__author__ = 'morganlnance'
# global variables
AA_name1_list = [ 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y' ]
AA_name3_list = [ "ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS", "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", "TRP", "TYR" ]
AA_name1_to_name3 = { 'A':"ALA", 'C':"CYS", 'D':"ASP", 'E':"GLU", 'F':"PHE", 'G':"GLY", 'H':"HIS", 'I':"ILE", 'K':"LYS", 'L':"LEU", 'M':"MET", 'N':"ASN", 'P':"PRO", 'Q':"GLN", 'R':"ARG", 'S':"SER", 'T':"THR", 'V':"VAL", 'W':"TRP", 'Y':"TYR" }
AA_name3_to_name1 = { "ALA":'A', "CYS":'C', "ASP":'D', "GLU":'E', "PHE":'F', "GLY":'G', "HIS":'H', "ILE":'I', "LYS":'K', "LEU":'L', "MET":'M', "ASN":'N', "PRO":'P', "GLN":'Q', "ARG":'R', "SER":'S', "THR":'T', "VAL":'V', "TRP":'W', "TYR":'Y' }
def mutate_residue( pose_num, new_res_name, input_pose, sf, pdb_num = False, pdb_chain = None ):
'''
Mutate residue at position <pose_num> to <new_res_name>
<new_res_name> can be a single-letter or three-letter residue code
If you are giving a pdb number, set <pdb_num> to True AND give me a <pdb_chain> letter id
:param pose_num: int( Pose number for residue )
:param new_res_name: str( one- or three-letter code for the new amino acid. Example 'A' or "THR" )
:param input_pose: Pose
:param sf: ScoreFunction ( used for packing )
:param pdb_num: bool( did you give me a PDB number instead? Set to True if so. Give me a <pdb_chain> too then ) Default = False (Pose number)
:param pdb_chain: str( PDB chain id such as 'A' or 'X'. Must have set <pdb_num> to True as well
:return: mutated Pose
'''
# imports
from pyrosetta import Pose, pose_from_sequence
from rosetta.core.conformation import ResidueFactory
# copy over the input pose
pose = input_pose.clone()
# check if <pdb_chain> was given if <pdb_num> is True
if pdb_num == True:
if pdb_chain is None:
print "\nYou told me you gave me a PDB number, but you did not give me a PDB chain id. Set <pdb_chain> to the appropriate chain id. Returning the original pose."
return pose
# check the logic of the input arguments
# ensure the <new_res_name> is a valid residue
if len( new_res_name ) != 1 and len( new_res_name ) != 3:
print "\nYou did not give me a single- or three-letter amino acid code. '%s' did not work. Returning the original pose." %new_res_name
return pose
# if it is a single-letter code
if len( new_res_name ) == 1 and new_res_name.upper() not in AA_name1_list:
print "\nIt appears that '%s' is not a valid single-letter amino acid code. Returning the original pose." %new_res_name
return pose
# if it is a three-letter code
elif len( new_res_name ) == 3 and new_res_name.upper() not in AA_name3_list:
print "\nIt appears that '%s' is not a valid three-letter amino acid code. Returning the original pose." %new_res_name
return pose
# otherwise, use the <new_res_name> argument to get the appropriate three-letter amino acid code
if len( new_res_name ) == 1:
single_new_res_name = new_res_name.upper()
new_res_name = AA_name1_to_name3[ single_new_res_name ]
else:
new_res_name = new_res_name.upper()
single_new_res_name = AA_name3_to_name1[ new_res_name ]
# ensure <pose_num> (and <pdb_chain>) exists in the pose
if not pdb_num:
if not 1 <= pose_num <= pose.size():
print "\nYou appear to have given me an invalid Pose residue number. Ensure residue number %s exists in your Pose. Returning the original pose." %pose_num
return pose
# if it's a PDB number, check it exists as well using the <pdb_chain> too
else:
# get the actual pose number
pose_num = pose.pdb_info().pdb2pose( pdb_chain, pose_num )
if pose_num == 0:
print "\nYour PDB number and chain ( %s chain %s ) don't seem to exist in the pose. Check your input. Returning the original pose." %( pose_num, pdb_chain )
return pose
# move on to the mutation
# instantiate a ResidueFactory
res_factory = ResidueFactory()
# create a three-mer of the <new_res_name> desired
# want a three-mer because it's easier to deal with a new amino acid that does not have a special end VariantType
threemer = pose_from_sequence( single_new_res_name * 3 )
# get the ResidueType from the middle <new_res_name> in the threemer
res_type = threemer.conformation().residue_type( 2 )
# build the new residue and preserve the CB information from the original pose
new_residue = res_factory.create_residue( res_type,
current_rsd = pose.residue( pose_num ),
conformation = pose.conformation(),
preserve_c_beta = False )
# replace the old residue in the pose
pose.replace_residue( pose_num, new_residue, orient_backbone = True )
return pose
def get_sum_hbond_E( sf, pose ):
'''
Get the sum of hbond_sr_bb, hbond_lr_bb, hbond_bb_sc, hbond_sc
:param sf: ScoreFunction
:param pose: Pose
:return: float( sum of hbond energies )
'''
# imports
from rosetta.core.scoring import score_type_from_name
# list of hbond energies to grab
hbond_E_names = [ "hbond_sr_bb", "hbond_lr_bb", "hbond_bb_sc", "hbond_sc" ]
return sum( [ sf.score_by_scoretype( pose, score_type_from_name( st ) ) for st in hbond_E_names ] )
#return sum( [ pose.energies().total_energies().get( score_type_from_name( n ) ) for n in hbond_E_names ] )
def show_score_breakdown( sf, pose ):
'''
Shows the breakdown of the <pose>'s total score by printing the score of each nonzero weighted ScoreType in <sf>
:param sf: ScoreFunction
:param pose: Pose
'''
# print out each score
print "\n".join( [ "%s: %s" %( score_type, round( sf.score_by_scoretype( pose, score_type ), 3 ) ) for score_type in sf.get_nonzero_weighted_scoretypes() ] )
print
def get_res_nums_within_radius( res_num_in, input_pose, radius, include_res_num = False ):
"""
Use the nbr_atom_xyz to find residue numbers within <radius> of <pose_num> in <pose>
The nbr_atom seems to be C4 on carbohydrates
:param res_num_in: int( Pose residue number )
:param input_pose: Pose
:param radius: int or float( radius around <pose_num> to use to select resiudes )
:param include_res_num: bool( do you want to include <res_num> in the return list? ) Default = False
:return: list( Pose residue numbers within <radius> of <pose_num>
"""
# clone the <input_pose>
pose = input_pose.clone()
# container for the centers of each residue in pose
centers_of_res = []
# fill up the centers container
for res_num in range( 1, pose.size() + 1 ):
center = pose.residue( res_num ).nbr_atom_xyz()
centers_of_res.append( center )
# container for residues inside the <radius>
res_nums_in_radius = []
# nbr_xyz of the residue of interest
res_num_xyz = pose.residue( res_num_in ).nbr_atom_xyz()
for res_num in range( 1, pose.size() + 1 ):
# this will get the xyz of the residue of interest, but it will be removed from the final list if desired
# (since it will be added as 0 will always be less than <radius>)
# get the center of the residue
center = pose.residue( res_num ).nbr_atom_xyz()
# keep the residue number if the nbr_atom_xyz is less than <radius>
if center.distance( res_num_xyz ) <= radius:
res_nums_in_radius.append( res_num )
# if the user didn't want the residue of interest in the return list, remove it
if not include_res_num:
res_nums_in_radius.remove( res_num_in )
return res_nums_in_radius
def get_res_nums_within_radius_of_residue_list( residues, input_pose, radius, include_res_nums = False ):
"""
Find all residue numbers around the list of <residues> given in <input_pose> within <radius> Angstroms.
Set <include_residues> if you want to include the list of passed <residues> in the return list of residue numbers.
Uses the nbr_atom to calculate distance. The nbr_atom seems to be C4 on carbohydrates
:param residues: list( Pose residue numbers )
:param input_pose: Pose
:param radius: int() or float( radius in Angstroms )
:param include_res_nums: bool( do you want to include the passed <residues> in the return list of resiude numbers? ) Default = False
:return: list( residues around passed <residues> list within <radius> Angstroms
"""
# argument check: ensure passed <residues> argument is a list
if type( residues ) != list:
print "\nArgument error. You're supposed to past me a list of residue numbers for the <residues> argument. Returning None."
return None
# use get_res_nums_within_radius to get all residue numbers
residues_within_radius = []
for res_num in residues:
residues_within_radius.extend( get_res_nums_within_radius( res_num, input_pose, radius, include_res_num = include_res_nums ) )
# get the set of the list and sort the residue numbers
set_of_residues_within_radius = [ res for res in set( residues_within_radius ) ]
# it is possible that there are still residues from <residues> in the list, so remove them one by one if desired
if not include_res_nums:
for res in residues:
try:
set_of_residues_within_radius.remove( res )
except ValueError:
pass
# sort
set_of_residues_within_radius.sort()
return set_of_residues_within_radius