Skip to content

Commit

Permalink
add ict sanitize ligand
Browse files Browse the repository at this point in the history
  • Loading branch information
Brandon Duane Walker authored and misterbrandonwalker committed Jul 31, 2024
1 parent 6d5007e commit ab6bd14
Show file tree
Hide file tree
Showing 21 changed files with 820 additions and 192 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.1-dev1
current_version = 0.1.1-dev2
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
Expand All @@ -24,6 +24,6 @@ replace = version = "{new_version}"

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]
[bumpversion:file:ict.yml]

[bumpversion:file:src/polus/mm/utils/__init__.py]
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
FROM condaforge/mambaforge
# docker build -f Dockerfile -t mrbrandonwalker/sanitize_ligand_tool .

FROM python:3.11-buster

# environment variables defined
ENV EXEC_DIR="/opt/executables"
Expand All @@ -8,20 +10,11 @@ COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}
# need copy src here because pyproject.toml has a reference to the src directory
COPY src ${EXEC_DIR}/src

RUN conda install -c conda-forge rdkit --yes

RUN pip install filepattern

RUN conda init bash

RUN mamba clean --all --yes
WORKDIR ${EXEC_DIR}

ADD Dockerfile ${EXEC_DIR}

COPY src ${EXEC_DIR}/src

RUN pip3 install ${EXEC_DIR} --no-cache-dir
# Default command. Additional arguments are provided through the command line
ENTRYPOINT ["python3", "-m", "polus.mm.utils"]
CMD ["--help"]
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sanitize Ligand (0.1.1-dev1)
# Sanitize Ligand (0.1.1-dev2)

Handle molecules with rdkit errors gracefully.

Expand Down
1 change: 1 addition & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.1-dev2
25 changes: 25 additions & 0 deletions utils/docking/diffdock/sanitize-ligand-tool/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
specVersion: 0.1.0
name: labshare/sanitize-ligand
version: 0.1.1-dev2
container: polusai/sanitize-ligand-tool:0.1.1-dev2
entrypoint: ""
title: Sanitize Ligand
description: Handle molecules with rdkit errors gracefully.
author: Brandon Walker ([email protected])
repository: https://github.com/labshare/polus-plugins
documentation: https://ncats.nih.gov/preclinical/core/informatics
citation: ""
inputs:
- name: pattern
required: true
description: Filepattern to parse files
type: string
- name: indir
required: true
description: Input collection.
type: collection
outputs:
- name: outdir
required: false
description: Output collection.
type: collection
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
[tool.poetry]
name = "mm-polus-sanitize-ligand"
version = "0.1.1-dev1"
version = "0.1.1-dev2"
description = "Handle molecules with rdkit errors gracefully."
authors = ["Brandon Walker <[email protected]>"]
readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.8"
python = ">=3.9,<3.12"
typer = "^0.7.0"
rdkit = "*"
rdkit = "2024.3.3"
sophios = "0.1.1"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Sanitize input ligand

doc: |-
Sanitize input ligand

baseCommand: ["python", "-m", "polus.mm.utils"]

hints:
DockerRequirement:
dockerPull: mrbrandonwalker/sanitize_ligand_tool@sha256:6c272a427377e05ed2a83da5cdb3c00678b74b05c248f56c78d2349201f34d2f

requirements:
InlineJavascriptRequirement: {}
InitialWorkDirRequirement: # conditionally overwrite the input ligand, otherwise cwltool will symlink to the original
listing:
- entry: $(inputs.input_small_mol_ligand)
writable: true

inputs:

input_small_mol_ligand:
type: File
format: edam:format_3814
inputBinding:
prefix: --input_small_mol_ligand

output_ligand:
type: string?

valid_ligand:
type: string?

outputs:

output_ligand:
type: File
format: edam:format_3814
outputBinding:
glob: "*.sdf"

valid_ligand:
type: boolean
outputBinding:
glob: valid.txt
loadContents: true
outputEval: |
${
// Read the contents of the file
const lines = self[0].contents.split("\n");
// Read boolean value from the first line
const valid = lines[0].trim() === "True";
return valid;

}

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""sanitize_ligand."""

__version__ = "0.1.1-dev1"
__version__ = "0.1.1-dev2"

from polus.mm.utils.sanitize_ligand import ( # pylint: disable=unused-import
sanitize_ligand,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Package entrypoint for the sanitize_ligand package."""

# Base packages
import logging
from os import environ
from pathlib import Path

import typer
from polus.mm.utils.sanitize_ligand import sanitize_ligand

logging.basicConfig(
format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
datefmt="%d-%b-%y %H:%M:%S",
)
POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
logger = logging.getLogger("polus.mm.utils.sanitize_ligand")
logger.setLevel(POLUS_LOG)

app = typer.Typer(help="Sanitize Ligand.")


@app.command()
def main(
input_small_mol_ligand: Path = typer.Option(
...,
"--input_small_mol_ligand",
help="Input input_small_mol_ligand to be processed.",
),
) -> None:
"""Sanitize Ligand."""
logger.info(f"input_small_mol_ligand: {input_small_mol_ligand}")
sanitize_ligand(input_small_mol_ligand)


if __name__ == "__main__":
app()
Original file line number Diff line number Diff line change
Expand Up @@ -162,36 +162,28 @@ def attempt_fix_ligand(
return valid_lig, molecule


def sanitize_ligand(
ligand_files: list[Path],
outdir: Path,
) -> None:
def sanitize_ligand(input_small_mol_ligand: Path) -> None:
"""Sanitize ligand file.
Args:
ligand_files: Ligand file pattern
outdir: Output directory
input_small_mol_ligand: Ligand file
"""
for input_small_mol_ligand in ligand_files:
output_small_mol_ligand = outdir / Path(input_small_mol_ligand.name)
mol: Chem.SDMolSupplier = Chem.SDMolSupplier(
input_small_mol_ligand.resolve(),
sanitize=False,
removeHs=False,
)[0]

valid_ligand = is_valid_ligand(mol)
if not valid_ligand:
valid_ligand, rdkit_mol = attempt_fix_ligand(mol)
else:
rdkit_mol = mol

if valid_ligand:
with Chem.SDWriter(output_small_mol_ligand) as w:
w.write(rdkit_mol)

if len(ligand_files) == 1:
# if scattering with many files
# let the presence of the file indicate validity
with outdir.joinpath("valid.txt").open("w", encoding="utf-8") as f:
f.write(str(valid_ligand))
output_small_mol_ligand = Path(input_small_mol_ligand.name)
mol: Chem.SDMolSupplier = Chem.SDMolSupplier(
input_small_mol_ligand.resolve(),
sanitize=False,
removeHs=False,
)[0]

valid_ligand = is_valid_ligand(mol)
if not valid_ligand:
valid_ligand, rdkit_mol = attempt_fix_ligand(mol)
else:
rdkit_mol = mol

if valid_ligand:
with Chem.SDWriter(output_small_mol_ligand) as w:
w.write(rdkit_mol)

with Path("valid.txt").open("w", encoding="utf-8") as f:
f.write(str(valid_ligand))
Loading

0 comments on commit ab6bd14

Please sign in to comment.