-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_count_contacts.py
executable file
·141 lines (104 loc) · 4.95 KB
/
run_count_contacts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/python
__author__ = "morganlnance"
###########################
#### PROGRAM ARGUMENTS ####
###########################
import argparse
parser = argparse.ArgumentParser(description="Use Python to count contacts.")
parser.add_argument("pickle_directory", type=str, help="give me the path to the protein and ligand pickle directory.")
parser.add_argument("cutoff", type=int, default=5, help="how big do you want the activesite cutoff to be, in angstroms? default = 5")
parser.add_argument("heavy_atoms", type=int, default=10, help="how many heavy atoms does a HETATM residue need to be considered a ligand? default = 10")
input_args = parser.parse_args()
#################
#### IMPORTS ####
#################
import os
import sys
try:
from colorama import Fore, Style
except:
pass
try:
import pandas as pd
except ImportError:
print "Trouble with imports - do you have pandas? Exiting"
sys.exit()
import non_rosetta_count_contacts as contact
######################
#### MAIN PROGRAM ####
######################
def go( pickle_dir, cutoff, heavy_atoms ):
# get the current working directory
working_dir = os.getcwd()
# check the integrity of the passed pickle_dir
if not os.path.isdir( pickle_dir ):
print "You passed me a nonexistent pickle directory. Please check your path:", pickle_dir
print "Exiting."
sys.exit()
# add a tailing '/' to the pickle_dir path if needed
if not pickle_dir.endswith( '/' ):
pickle_dir += '/'
# collect the PDB names from the protein and ligand pickle files
os.chdir( pickle_dir )
all_pdb_names = []
for pickle_file in os.listdir( os.getcwd() ):
pdb_name = pickle_file[:4]
if pdb_name not in all_pdb_names:
all_pdb_names.append( pdb_name )
os.chdir( working_dir )
# instantiate an instance of the contact counting code to instantiate class-specific data holders
ctct = contact.CTCT()
# for each PDB name collected, run the contact counting code using the pro and lig pickles
for pdb in all_pdb_names:
# inform the user of the current PDB being worked on
try:
# try to print in color if user has colorama library
text = "Working on %s" %pdb
print(Fore.RED + text + Style.RESET_ALL)
except:
print "Working on", pdb
# store the pickle file paths given the passed information
pro_pickle_path = pickle_dir + pdb + "_pro.p"
lig_pickle_path = pickle_dir + pdb + "_lig.p"
# read in the protein and ligand files for this PDB
ctct.read_pro_lig_pickles( pro_pickle_path, lig_pickle_path )
# get protein atoms in the activesite around the ligand
ctct.get_activesite( cutoff )
# count contacts
ctct.count_contacts( cutoff )
print "\n\n\n"
###########################
##### DATA COLLECTION #####
###########################
# generic filename prefix
filename_prefix = "program_input_"
# contact counting data
CC_df = pd.DataFrame()
CC_df["pdb_names"] = ctct.CC_pdb_names
CC_df["num_lig_atoms"] = ctct.CC_lig_atms
CC_df["num_activesite_atms"] = ctct.CC_activesite_atms
CC_df["num_pp_contacts_within_cutoff"] = ctct.CC_pp_contacts
CC_df["num_pn_contacts_within_cutoff"] = ctct.CC_pn_contacts
CC_df["num_np_contacts_within_cutoff"] = ctct.CC_np_contacts
CC_df["num_nn_contacts_within_cutoff"] = ctct.CC_nn_contacts
CC_df["num_unk_contacts"] = ctct.CC_unk_contacts
print CC_df
print "\n\n\n\n"
CC_df.to_csv( str( working_dir ) + '/' + filename_prefix + "contact_counts_" + str( cutoff ) + "_Ang_cutoff_and_" + str( heavy_atoms ) + "_heavy_atom_ligand.csv", index = 0, index_col = 0 )
# make data lists to add over course of program for contact counts per lig - will be added to pandas df at end
CC_per_lig_df = pd.DataFrame()
CC_per_lig_df["pdb_names"] = ctct.CC_per_lig_pdb_names
CC_per_lig_df["lig_names"] = ctct.CC_per_lig_lig_names
CC_per_lig_df["num_lig_atms"] = ctct.CC_per_lig_lig_atms
CC_per_lig_df["num_activesite_atms"] = ctct.CC_per_lig_activesite_atms
CC_per_lig_df["pp_contacts_within_cutoff"] = ctct.CC_per_lig_pp_contacts
CC_per_lig_df["pn_contacts_within_cutoff"] = ctct.CC_per_lig_pn_contacts
CC_per_lig_df["np_contacts_within_cutoff"] = ctct.CC_per_lig_np_contacts
CC_per_lig_df["nn_contacts_within_cutoff"] = ctct.CC_per_lig_nn_contacts
CC_per_lig_df["num_unk_contacts"] = ctct.CC_per_lig_unk_contacts
print CC_per_lig_df
CC_per_lig_df.to_csv( str( working_dir ) + '/' + filename_prefix + "contact_counts_per_lig_res_" + str( cutoff ) + "_Ang_cutoff_and_" + str( heavy_atoms ) + "_heavy_atom_ligand.csv", index = 0, index_col = 0 )
######################
#### RUNS PROGRAM ####
######################
go( input_args.pickle_directory, input_args.cutoff, input_args.heavy_atoms )