Skip to content

Commit

Permalink
Update network architecture
Browse files Browse the repository at this point in the history
  • Loading branch information
Isomorpfishm committed Oct 16, 2023
1 parent 5de8ca5 commit 1873daa
Show file tree
Hide file tree
Showing 47 changed files with 7,244 additions and 303 deletions.
18 changes: 12 additions & 6 deletions MakeGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str,
default='./config/config.yml')
default='./config/train.yml')
parser.add_argument('--outdir', type=str,
default='./dataset/crossdocked_graph10_v2')
default='./dataset/crossdocked_graph10_v3')
args = parser.parse_args()


# Logging
log_dir = args.outdir
logger = get_logger('feature_extract_log', log_dir)
Expand All @@ -56,6 +57,7 @@
os.makedirs(args.outdir)
logger.info("Output directory {args.outdir} is created")


# Load config
outDirExists = os.path.isfile(args.config)
if not outDirExists:
Expand All @@ -67,6 +69,7 @@
split_dict = torch.load(config.dataset.split)
logger.info(f"Found {len(split_dict['train'])} samples in the Crossdock dataset for training")


# Docking ligands with Autodock Vina implemented in ODDT
logger.info("Extracting features...")
for i in tqdm(range(len(split_dict['train']))):
Expand All @@ -80,13 +83,14 @@
logger.info(f"Output directory {name.split('/')[0]} is created")

logger.info(f"Now reading {name}")
proteinDual = StructureDual(os.path.join(config.dataset.path, split_dict['train'][i][0]), isProtein=True)
ligandDual = StructureDual(os.path.join(config.dataset.path, split_dict['train'][i][1]), isProtein=False)
proteinDual = StructureDual(os.path.join(config.featuriser.data, split_dict['train'][i][0]), isProtein=True)
ligandDual = StructureDual(os.path.join(config.featuriser.data, split_dict['train'][i][1]), isProtein=False)

try:
protein, _protein = proteinDual.parse_to_oddt(), proteinDual.parse_to_rdkit()
ligand, _ligand = ligandDual.parse_to_oddt(), ligandDual.parse_to_rdkit()
ligand_com = parse_sdf_file(os.path.join(config.dataset.path, split_dict['train'][i][1]))['center_of_mass']
ligand_data = parse_sdf_file(os.path.join(config.featuriser.data, split_dict['train'][i][1]))
ligand_com = ligand_data['center_of_mass']
except Exception as e:
logger.error(traceback.format_exc())
skippedComplex.append(name)
Expand All @@ -102,6 +106,7 @@
num_modes=int(config.autodock.num_modes),
executable=config.autodock.executable)


# Extracting vina score of native structure
vina_score = float(vina.predict_ligand(ligand).data['vina_affinity'])
protein_cc, ligand_cc = close_contacts(x=protein.atom_dict,
Expand All @@ -126,7 +131,8 @@
list_atom_name = proteinDual.RetrieveAtomNames()

g = create_pyg_graph(protein=protein,
ligand=ligand,
ligand=ligand,
ligand_data=ligand_data,
cutoff=config.featuriser.interaction_cutoff,
list_atom_name=list_atom_name,
name=name,
Expand Down
35 changes: 21 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,21 @@ Working inside a Conda virtual environment is hightly encouraged, but not necess
conda env create -f environment.yml
```

Otherwise, install the following dependencies sequentially.
Otherwise, install the following packages and dependencies sequentially:

```
conda create -n SINGA python=3.10
conda activate SINGA
conda install -c conda-forge openbabel
conda install mkl-service
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
pip install torch_geometric==2.3.1
pip install torch-scatter==2.1.1 torch-sparse==0.6.17
pip install torch_geometric==2.3.1 torch-scatter==2.1.1 torch-sparse==0.6.17 torch-cluster==1.6.3
pip install biopandas==0.4.1 pytorch-lightning==2.0.6
pip install rdkit==2023.3.3
pip install oddt==0.7
pip install easydict==1.10
pip install e3nn==0.5.1
pip install rdkit==2023.3.3 oddt==0.7
pip install e3nn==0.5.1 dgl==1.1.2
pip install tensorboard==2.14.1
pip install pybel==0.15.5
pip install termcolor==2.3.0 easydict==1.10
```

## Directory tree
Expand All @@ -49,35 +50,41 @@ pip install e3nn==0.5.1
|__ /ckpt
|__ /config
|__ /dataset
|__ /crossdocked_graph10_v2
|__ /crossdocked_graph10_v3
|__ /example
|__ /features
|__ /img
|__ /logs
|__ /model
|__ Discriminator.py
|__ EF_embedding.py
|__ __init__.py
|__ BeamSearch.py
|__ CProMG.py
|__ EF_layers.py
|__ Embedding.py
|__ GAN.py
|__ Generator.py
|__ Masking.py
|__ Jd.pt
|__ /output
|__ /utils
|__ /ledock
|__ __init__.py
|__ Data.py
|__ Featuriser.py
|__ fpscores.pkl.gz
|__ gen.py
|__ misc.py
|__ PLFeature.py
|__ PLIExtension.py
|__ PLInteraction.py
|__ PLParser.py
|__ redirect.py
|__ SAScorer.py
|__ Stopper.py
|__ .gitignore
|__ __init__.py
|__ environment.yml
|__ LICENSE
|__ MakeGraph.py
|__ README.md
|__ gen.py
|__ train.py
```

Expand All @@ -87,4 +94,4 @@ MIT License

## Acknowledgement

Part of this codebase is adapted from [EquiformerV2](https://github.com/atomicarchitects/equiformer_v2) and [HGScore](https://github.com/KevinCrp/HGScore). Details of the adaptation are stated explicitly in the script.
Part of this codebase is adapted from [EquiformerV2](https://github.com/atomicarchitects/equiformer_v2) and [CProMG](https://github.com/lijianing0902/CProMG). Details of the adaptation are stated explicitly in the script.
18 changes: 0 additions & 18 deletions config/config.yml

This file was deleted.

187 changes: 185 additions & 2 deletions config/train.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
autodock:
executable: ./autodock_vina/bin/vina
num_modes: 9
seed: 1

featuriser:
data: ./dataset/crossdocked_pocket10
interaction_cutoff: 4.0
sasa_cutoff: 4.0
symbol_radius_path: ./config/symbol_radius.json

general:
MAX_INT: 2147483647
seed: 1

dataset:
path: ./dataset/crossdocked_graph10_v2
name: pl
path: ./dataset/crossdocked_graph10_v3
split: ./dataset/split_by_name.pt
atomic_distance_cutoff: 4.0

dataloader:
atomic_distance_cutoff: 4.0
Expand Down Expand Up @@ -35,3 +50,171 @@ embedding:

model:
name: SINGA
hidden_channels: 256
featurizer_feat_dim: 784
encoder:
name: cftfm
hidden_channels: 256
edge_channels: 64
key_channels: 128
num_heads: 4
num_interactions: 6
cutoff: 10.0
knn: 48 # kNN Graph
lap_dim: 8
decoder:
tgt_len: 200
hidden_channels: 256
key_channels: 128
num_heads: 4
num_interactions: 6
padding: "right"
smiVoc: [
"#",
"$",
"&",
"(",
")",
"-",
"/",
".",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"=",
"B",
"Br",
"C",
"Cl",
"F",
"I",
"N",
"O",
"P",
"S",
"[125I]",
"[18F]",
"[2H]",
"[3H]",
"[AlH2]",
"[As]",
"[Au]",
"[B-]",
"[C-]",
"[C@@H]",
"[C@@]",
"[C@H]",
"[C@]",
"[CH-]",
"[Cr]",
"[Fe--]",
"[Fe@@]",
"[Fe@]",
"[Fe]",
"[Hg]",
"[K]",
"[Li]",
"[Mg]",
"[MgH2]",
"[Mo]",
"[N+]",
"[N-]",
"[N@+]",
"[N@@+]",
"[N@@]",
"[N@H+]",
"[N@]",
"[NH+]",
"[NH-]",
"[NH2+]",
"[NH3+]",
"[N]",
"[Na]",
"[O+]",
"[O-]",
"[OH+]",
"[O]",
"[P+]",
"[P@@]",
"[P@]",
"[PH]",
"[P]",
"[Pd]",
"[Re]",
"[Ru@@]",
"[Ru]",
"[S+]",
"[S-]",
"[S@+]",
"[S@@+]",
"[S@@H]",
"[S@@]",
"[S@H]",
"[S@]",
"[SH]",
"[Sc]",
"[S]",
"[Sb]",
"[SeH]",
"[Se]",
"[Si]",
"[SnH]",
"[Sn]",
"[V]",
"[Zn++]",
"[c-]",
"[n+]",
"[n-]",
"[nH+]",
"[nH]",
"[o+]",
"[s+]",
"[se]",
"[V]",
"[W]",
"[Zn]",
"\\",
"^",
"c",
"n",
"o",
"p",
"s",
]

train:
seed: 2022
batch_size: 64
num_workers: 4
max_iters: 3
val_freq: 1000
pos_noise_std: 0.1
max_grad_norm: .inf
num_props: 3
prop:
- vina_score
- qed
- sas
optimizer:
type: adam
lr: 1.e-4
weight_decay: 0
beta1: 0.99
beta2: 0.999
scheduler:
type: plateau
factor: 0.6
patience: 5
min_lr: 1.e-5

generate:
prop:
- 1
- 1
- 1
Loading

0 comments on commit 1873daa

Please sign in to comment.