-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathpreprocess_pdb.sh
executable file
·152 lines (126 loc) · 3.98 KB
/
preprocess_pdb.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/bin/bash
# clear environment variables
unset OUTDIR
unset LIGAND
unset SDFFILE
unset POSITIONAL_ARGS
unset PDBFILE
unset NAME_CHAIN
unset MOL2FILE
### USER INPUT #################################################################################################
while [[ $# -gt 0 ]]; do
case $1 in
-o|--outdir)
OUTDIR=$2
shift # past argument
shift # past value
;;
-l|--ligand)
LIGAND="$2"
shift # past argument
shift # past value
;;
-s|--sdf)
SDFFILE="$(realpath $2)"
shift # past argument
shift # past value
;;
-m|--mol2)
MOL2FILE="$(realpath $2)"
shift # past argument
shift # past value
;;
-*|--*)
echo "Unknown option $1"
exit 1
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
PDBFILE="$(realpath $1)"
NAME_CHAIN=$2
if [ -z $PDBFILE ]; then
echo "[ERROR] Please provide an input PDB file"
exit 1
fi
if [ -z $NAME_CHAIN ]; then
echo "[ERROR] Please provide the protein definition (as <PDBID_CHAINID>)"
exit 1
fi
if [ -z $OUTDIR ]; then
echo "[ERROR] Please provide an output directory"
exit 1
fi
if [ -z $LIGAND ]; then
echo "[INFO] No small molecule provided"
fi
################################################################################################################
# get directory where this script is located
BASEDIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# set some variables
MASIF_SOURCE=$BASEDIR/masif/source/
MASIF_SEED_SOURCE=$BASEDIR/masif_seed_search/source/
MASIF_DATA=$BASEDIR/masif/data/
MASIF_TARGETS_DIR=$BASEDIR/masif_seed_search/data/masif_targets
export PYTHONPATH=$PYTHONPATH:$MASIF_SOURCE
export PYTHONPATH=$PYTHONPATH:$OUTDIR
OUTDIR="$(realpath $OUTDIR)"
export TMPDIR=$OUTDIR/tmp/
# Move to output directory
mkdir -p $OUTDIR
cd $OUTDIR
# create directories
mkdir -p data_preparation/00-raw_pdbs/
mkdir -p $TMPDIR
# Link required folders
ln -sf $MASIF_TARGETS_DIR/nn_models
#ln -sf $MASIF_DATA/masif_site/nn_models masif_site_models
#ln -sf $MASIF_DATA/masif_ppi_search/nn_models masif_search_models
# ./data_prepare_one_spec.sh $PDBFILE $NAME_CHAIN $LIGAND $MOL2FILE
echo "Precomputing features on $PDBFILE"
PPI_PAIR_ID=$NAME_CHAIN
PDB_ID=$(echo $NAME_CHAIN| cut -d"_" -f1)
CHAIN1=$(echo $NAME_CHAIN| cut -d"_" -f2)
cp $PDBFILE data_preparation/00-raw_pdbs/$PDB_ID\.pdb
python -W ignore $MASIF_SOURCE/data_preparation/01-pdb_extract_and_triangulate.py $PDB_ID\_$CHAIN1 $LIGAND $SDFFILE $MOL2FILE
return_code=$?
# Run MaSIF
if [ $return_code -eq 0 ]; then
python $MASIF_SOURCE/data_preparation/04-masif_precompute.py masif_site $PPI_PAIR_ID
return_code=$?
fi
if [ $return_code -eq 0 ]; then
python $MASIF_SOURCE/data_preparation/04-masif_precompute.py masif_ppi_search $PPI_PAIR_ID
return_code=$?
fi
# ./predict_site.sh $NAME_CHAIN
if [ $return_code -eq 0 ]; then
echo "Running masif site on $PDBFILE"
python -W ignore $MASIF_SOURCE/masif_site/masif_site_predict.py nn_models.all_feat_3l.custom_params $NAME_CHAIN
return_code=$?
fi
# ./color_site.sh $NAME_CHAIN
if [ $return_code -eq 0 ]; then
export PYTHONPATH=$PYTHONPATH:$MASIF_DATA/masif_site/
python -W ignore $MASIF_SOURCE/masif_site/masif_site_label_surface.py nn_models.all_feat_3l.custom_params $NAME_CHAIN
return_code=$?
fi
# ./compute_descriptors.sh $NAME_CHAIN
if [ $return_code -eq 0 ]; then
echo "Computing descriptors"
export PYTHONPATH=$PYTHONPATH:$MASIF_DATA/masif_ppi_search/
python $MASIF_SOURCE/masif_ppi_search/masif_ppi_search_comp_desc.py nn_models.sc05.all_feat.custom_params $NAME_CHAIN
return_code=$?
fi
# copy files required for seed search
if [ $return_code -eq 0 ]; then
echo "Creating running directory targets/$NAME_CHAIN "
mkdir -p targets
cp -r $MASIF_TARGETS_DIR/targets/template/ targets/$NAME_CHAIN
fi
# return to previous directory
cd -