Skip to content

Commit

Permalink
refactor sanitize_ligand
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon Duane Walker authored and misterbrandonwalker committed Aug 6, 2024
1 parent f8a4d5c commit 076f9ad
Show file tree
Hide file tree
Showing 25 changed files with 274 additions and 201 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.1-dev1
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.sdf filter=lfs diff=lfs merge=lfs -text
File renamed without changes.
25 changes: 25 additions & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# docker build -f Dockerfile -t polusai/sanitize-ligand-tool .
FROM condaforge/mambaforge

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}


# Work directory defined in the base container
# WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}

# Install needed packages here

COPY src ${EXEC_DIR}/src

ADD Dockerfile .

RUN pip3 install ${EXEC_DIR} --no-cache-dir

CMD ["--help"]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sanitize Ligand (0.1.1-dev1)
# Sanitize Ligand (0.1.0)

Handle molecules with rdkit errors gracefully.

Expand Down
1 change: 1 addition & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
4 changes: 4 additions & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/build-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/sanitize-ligand-tool:${version}
9 changes: 9 additions & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: project_env
channels:
- conda-forge
dependencies:
- python==3.10
- rdkit==2024.03.1
- pytest==8.1.1
- cwltool==3.1.20240404144621
- cwl-utils==0.33
36 changes: 36 additions & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
specVersion: "0.1.0"
name: sanitize_ligand
version: 0.1.0
container: sanitize-ligand-tool
entrypoint:
title: sanitize_ligand
description: Sanitize input ligand
author: Brandon Walker, Nazanin Donyapour
contact: [email protected], [email protected]
repository:
documentation:
citation:

inputs:
- name: input_small_mol_ligand
required: true
description:
type: File
format:
uri: edam:format_3814
outputs:
- name: output_ligand
required: true
description:
type: File
format:
uri: edam:format_3814
- name: valid_ligand
required: true
description:
type: boolean
ui:
- key: inputs.input_small_mol_ligand
title: "input_small_mol_ligand: "
description: ""
type: File
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
[tool.poetry]
name = "mm-polus-sanitize-ligand"
version = "0.1.1-dev1"
version = "0.1.0"
description = "Handle molecules with rdkit errors gracefully."
authors = ["Brandon Walker <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.8"
python = ">=3.9,<3.12"
typer = "^0.7.0"
rdkit = "*"
sophios = "0.1.1"
rdkit = "2024.3.3"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Sanitize input ligand

doc: |-
Sanitize input ligand

baseCommand: ["python", "-m", "polus.mm.utils.sanitize_ligand"]

hints:
DockerRequirement:
dockerPull: polusai/sanitize-ligand-tool@sha256:926e501300fa5b940c250347cf346cdcacf21944469ed82c3788a58e0957c18d

requirements:
InlineJavascriptRequirement: {}
InitialWorkDirRequirement: # conditionally overwrite the input ligand, otherwise cwltool will symlink to the original
listing:
- $(inputs.input_small_mol_ligand)

inputs:

input_small_mol_ligand:
type: File
format:
- edam:format_3814
inputBinding:
prefix: --input_small_mol_ligand

outputs:

output_ligand:
type: File
format: edam:format_3814
outputBinding:
glob: "*.sdf"

valid_ligand:
type: boolean
outputBinding:
glob: valid.txt
loadContents: true
outputEval: |
${
// Read the contents of the file
const lines = self[0].contents.split("\n");
// Read boolean value from the first line
const valid = lines[0].trim() === "True";
return valid;

}

stderr:
type: File
outputBinding:
glob: stderr

stdout:
type: File
outputBinding:
glob: stdout

stderr: stderr

stdout: stdout

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""sanitize_ligand."""

__version__ = "0.1.1-dev1"
__version__ = "0.1.0"

from polus.mm.utils.sanitize_ligand import ( # pylint: disable=unused-import
sanitize_ligand,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Package entrypoint for the sanitize_ligand package."""

# Base packages
import logging
from os import environ

import typer
from polus.mm.utils.sanitize_ligand import sanitize_ligand

logging.basicConfig(
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S",
)
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
logger = logging.getLogger("polus.mm.utils.sanitize_ligand")
logger.setLevel(POLUS_LOG)

app = typer.Typer(help="Sanitize Ligand.")


@app.command()
def main(
input_small_mol_ligand: str = typer.Option(
...,
"--input_small_mol_ligand",
help="Input input_small_mol_ligand to be processed.",
),
) -> None:
"""Sanitize Ligand."""
logger.info(f"input_small_mol_ligand: {input_small_mol_ligand}")
sanitize_ligand(input_small_mol_ligand)


if __name__ == "__main__":
app()
Original file line number Diff line number Diff line change
Expand Up @@ -163,35 +163,31 @@ def attempt_fix_ligand(


def sanitize_ligand(
ligand_files: list[Path],
input_small_mol_ligand: Path,
outdir: Path,
) -> None:
"""Sanitize ligand file.
Args:
ligand_files: Ligand file pattern
input_small_mol_ligand: Ligand file
outdir: Output directory
"""
for input_small_mol_ligand in ligand_files:
output_small_mol_ligand = outdir / Path(input_small_mol_ligand.name)
mol: Chem.SDMolSupplier = Chem.SDMolSupplier(
input_small_mol_ligand.resolve(),
sanitize=False,
removeHs=False,
)[0]

valid_ligand = is_valid_ligand(mol)
if not valid_ligand:
valid_ligand, rdkit_mol = attempt_fix_ligand(mol)
else:
rdkit_mol = mol

if valid_ligand:
with Chem.SDWriter(output_small_mol_ligand) as w:
w.write(rdkit_mol)

if len(ligand_files) == 1:
# if scattering with many files
# let the presence of the file indicate validity
with outdir.joinpath("valid.txt").open("w", encoding="utf-8") as f:
f.write(str(valid_ligand))
output_small_mol_ligand = outdir / Path(input_small_mol_ligand.name)
mol: Chem.SDMolSupplier = Chem.SDMolSupplier(
input_small_mol_ligand.resolve(),
sanitize=False,
removeHs=False,
)[0]

valid_ligand = is_valid_ligand(mol)
if not valid_ligand:
valid_ligand, rdkit_mol = attempt_fix_ligand(mol)
else:
rdkit_mol = mol

if valid_ligand:
with Chem.SDWriter(output_small_mol_ligand) as w:
w.write(rdkit_mol)

with outdir.joinpath("valid.txt").open("w", encoding="utf-8") as f:
f.write(str(valid_ligand))
Git LFS file not shown
59 changes: 59 additions & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/tests/test_sanitize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Test the sanitize_ligand plugin."""
from pathlib import Path

import pytest
from polus.mm.utils.sanitize_ligand import attempt_fix_ligand
from rdkit import Chem
from sophios.api.pythonapi import Step
from sophios.api.pythonapi import Workflow


@pytest.mark.catch_error()
def test_kekulization_error_catch() -> None:
"""Test catching Kekulization error.
Can't kekulize mol. Unkekulized atoms: 6 7 8 9 10.
"""
mol = Chem.MolFromSmiles("c1ccc(cc1)-c1nnc(n1)-c1ccccc1")
valid_ligand, rdkit_mol = attempt_fix_ligand(mol)
assert not valid_ligand


@pytest.mark.fix_ligand()
def test_fix_explicit_valence_error() -> None:
"""Test fixing explicit valence error.
Explicit valence for atom # 1 C, 5, is greater than permitted
"""
mol = Chem.MolFromSmiles("c1c(ccc2NC(CN=c(c21)(C)C)=O)O", sanitize=False)
valid_ligand, rdkit_mol = attempt_fix_ligand(mol)
assert valid_ligand


def test_sanitize_ligand_cwl() -> None:
"""Test the sanitize_ligand CWL."""
cwl_file_str = "sanitize_ligand_0@[email protected]"
cwl_file = Path(__file__).resolve().parent.parent / Path(cwl_file_str)

input_ligand_path = Path(__file__).resolve().parent / Path("4xk9_ligand.sdf")

sanitize_ligand = Step(clt_path=cwl_file)
sanitize_ligand.input_small_mol_ligand = input_ligand_path

steps = [sanitize_ligand]
filename = "sanitize_ligand"
viz = Workflow(steps, filename)

viz.run()

outdir = Path("outdir")
output_files = list(outdir.rglob("*.sdf"))

assert output_files, "No output SDF files were generated."

# Check if the input SDF filename is part of the output
input_filename = input_ligand_path.name
output_filenames = [f.name for f in output_files]
assert any(
input_filename in of for of in output_filenames
), f"The input SDF file '{input_filename}' was not found in the output."
27 changes: 0 additions & 27 deletions utils/sanitize-ligand-plugin/Dockerfile

This file was deleted.

1 change: 0 additions & 1 deletion utils/sanitize-ligand-plugin/VERSION

This file was deleted.

4 changes: 0 additions & 4 deletions utils/sanitize-ligand-plugin/build-docker.sh

This file was deleted.

Loading

0 comments on commit 076f9ad

Please sign in to comment.