From b26e2355ad06cf8de7e7c0c9af98d8d64040b389 Mon Sep 17 00:00:00 2001 From: Nazanin Donyapour Date: Thu, 25 Apr 2024 18:21:37 -0400 Subject: [PATCH] autodock-vina-filter plugin --- .../.bumpversion.cfg | 29 ++ .../autodock-vina-filter-plugin/.dockerignore | 4 + .../.gitattributes | 3 + utils/autodock-vina-filter-plugin/.gitignore | 1 + .../autodock-vina-filter-plugin/CHANGELOG.md | 5 + utils/autodock-vina-filter-plugin/Dockerfile | 22 ++ utils/autodock-vina-filter-plugin/README.md | 26 ++ utils/autodock-vina-filter-plugin/VERSION | 1 + .../autodock_vina_filter_0@1@0.cwl | 315 ++++++++++++++++++ .../build-docker.sh | 4 + utils/autodock-vina-filter-plugin/ict.yml | 154 +++++++++ .../pyproject.toml | 30 ++ .../mm/utils/autodock_vina_filter/__init__.py | 7 + .../mm/utils/autodock_vina_filter/__main__.py | 85 +++++ .../autodock_vina_filter.py | 159 +++++++++ .../tests/1e3g.pdbqt | 3 + .../tests/1e3g_protein.pdb | 3 + .../tests/__init__.py | 1 + .../tests/binding_data.txt | 3 + .../tests/test_autodock_vina_filter.py | 79 +++++ .../tests/vina.log | 40 +++ 21 files changed, 974 insertions(+) create mode 100644 utils/autodock-vina-filter-plugin/.bumpversion.cfg create mode 100644 utils/autodock-vina-filter-plugin/.dockerignore create mode 100644 utils/autodock-vina-filter-plugin/.gitattributes create mode 100644 utils/autodock-vina-filter-plugin/.gitignore create mode 100644 utils/autodock-vina-filter-plugin/CHANGELOG.md create mode 100644 utils/autodock-vina-filter-plugin/Dockerfile create mode 100644 utils/autodock-vina-filter-plugin/README.md create mode 100644 utils/autodock-vina-filter-plugin/VERSION create mode 100644 utils/autodock-vina-filter-plugin/autodock_vina_filter_0@1@0.cwl create mode 100644 utils/autodock-vina-filter-plugin/build-docker.sh create mode 100644 utils/autodock-vina-filter-plugin/ict.yml create mode 100644 utils/autodock-vina-filter-plugin/pyproject.toml create mode 100644 utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__init__.py create mode 100644 utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__main__.py create mode 100644 utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/autodock_vina_filter.py create mode 100644 utils/autodock-vina-filter-plugin/tests/1e3g.pdbqt create mode 100644 utils/autodock-vina-filter-plugin/tests/1e3g_protein.pdb create mode 100644 utils/autodock-vina-filter-plugin/tests/__init__.py create mode 100644 utils/autodock-vina-filter-plugin/tests/binding_data.txt create mode 100644 utils/autodock-vina-filter-plugin/tests/test_autodock_vina_filter.py create mode 100644 utils/autodock-vina-filter-plugin/tests/vina.log diff --git a/utils/autodock-vina-filter-plugin/.bumpversion.cfg b/utils/autodock-vina-filter-plugin/.bumpversion.cfg new file mode 100644 index 00000000..94052cb3 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/.bumpversion.cfg @@ -0,0 +1,29 @@ +[bumpversion] +current_version = 0.1.0 +commit = False +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:VERSION] + +[bumpversion:file:README.md] + +[bumpversion:file:plugin.json] + +[bumpversion:file:src/polus/mm/utils/autodock_vina_filter/__init__.py] diff --git a/utils/autodock-vina-filter-plugin/.dockerignore b/utils/autodock-vina-filter-plugin/.dockerignore new file mode 100644 index 00000000..7c603f81 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/.dockerignore @@ -0,0 +1,4 @@ +.venv +out +tests +__pycache__ diff --git a/utils/autodock-vina-filter-plugin/.gitattributes b/utils/autodock-vina-filter-plugin/.gitattributes new file mode 100644 index 00000000..6612963f --- /dev/null +++ b/utils/autodock-vina-filter-plugin/.gitattributes @@ -0,0 +1,3 @@ +*.pdb filter=lfs diff=lfs merge=lfs -text +*.pdbqt filter=lfs diff=lfs merge=lfs -text +*.txt filter=lfs diff=lfs merge=lfs -text diff --git a/utils/autodock-vina-filter-plugin/.gitignore b/utils/autodock-vina-filter-plugin/.gitignore new file mode 100644 index 00000000..c04bc49f --- /dev/null +++ b/utils/autodock-vina-filter-plugin/.gitignore @@ -0,0 +1 @@ +poetry.lock diff --git a/utils/autodock-vina-filter-plugin/CHANGELOG.md b/utils/autodock-vina-filter-plugin/CHANGELOG.md new file mode 100644 index 00000000..b67793f7 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/CHANGELOG.md @@ -0,0 +1,5 @@ +# CHANGELOG + +## 0.1.0 + +Initial release. diff --git a/utils/autodock-vina-filter-plugin/Dockerfile b/utils/autodock-vina-filter-plugin/Dockerfile new file mode 100644 index 00000000..e8ac9249 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/Dockerfile @@ -0,0 +1,22 @@ +FROM condaforge/mambaforge + +ENV EXEC_DIR="/opt/executables" +ENV POLUS_LOG="INFO" +RUN mkdir -p ${EXEC_DIR} + + +# Work directory defined in the base container +# WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY CHANGELOG.md ${EXEC_DIR} + +# Install needed packages here + +COPY src ${EXEC_DIR}/src + +RUN pip3 install ${EXEC_DIR} --no-cache-dir + +CMD ["--help"] diff --git a/utils/autodock-vina-filter-plugin/README.md b/utils/autodock-vina-filter-plugin/README.md new file mode 100644 index 00000000..31ac5b41 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/README.md @@ -0,0 +1,26 @@ +# autodock_vina_filter (0.1.0) + +Filters results of the AutoDock Vina software. + +## Options + +This plugin takes 11 input arguments and 5 output argument: + +| Name | Description | I/O | Type | Default | +|---------------|-------------------------|--------|--------|---------| +| input_log_path | Path to the log file, Type: string, File type: output, Accepted formats: log, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log | Input | File | File | +| input_log_paths | Path to the log files, Type: string, File type: output, Accepted formats: log, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log | Input | File[] | File[] | +| docking_score_cutoff | Cutoff threshold for filtering docking scores, Type: float | Input | float | float | +| max_num_poses_per_ligand | Maximum number of poses per initial ligand, Type: int | Input | int | int | +| max_num_poses_total | Maximum number of poses total, Type: int | Input | int | int | +| input_txt_path | Experimental binding free energy data file (if any) | Input | File | File | +| rescore | Use True if autodock vina was run with --rescore | Input | boolean | boolean | +| input_ligand_pdbqt_path | Path to the input PDBQT ligands, Type: string, File type: input, Accepted formats: pdbqt, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/vina/vina_ligand.pdbqt | Input | {'type': 'array', 'items': {'type': 'array', 'items': 'File'}} | {'type': 'array', 'items': {'type': 'array', 'items': 'File'}} | +| output_ligand_pdbqt_path | Path to the output PDBQT ligand files, Type: string, File type: output, Accepted formats: pdbqt, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.pdbqt | Input | string | string | +| input_receptor_pdbqt_path | Path to the input PDBQT receptors, Type: string, File type: input, Accepted formats: pdbqt, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/vina/vina_ligand.pdbqt | Input | {'type': 'array', 'items': {'type': 'array', 'items': 'File'}} | {'type': 'array', 'items': {'type': 'array', 'items': 'File'}} | +| output_receptor_pdbqt_path | Path to the output PDBQT receptor files, Type: string, File type: output, Accepted formats: pdbqt, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.pdbqt | Input | string | string | +| output_ligand_pdbqt_path | Path to the output PDBQT file | Output | File[] | File[] | +| output_receptor_pdbqt_path | Path to the output PDBQT file | Output | File[] | File[] | +| docking_scores | Estimated Free Energies of Binding | Output | {'type': 'array', 'items': 'float'} | {'type': 'array', 'items': 'float'} | +| experimental_binding_data | Experimental binding data (if any) | Output | ['null', {'type': 'array', 'items': 'float'}] | ['null', {'type': 'array', 'items': 'float'}] | +| experimental_dGs | Experimental binding free energy dG values (if any) | Output | ['null', {'type': 'array', 'items': 'float'}] | ['null', {'type': 'array', 'items': 'float'}] | diff --git a/utils/autodock-vina-filter-plugin/VERSION b/utils/autodock-vina-filter-plugin/VERSION new file mode 100644 index 00000000..6e8bf73a --- /dev/null +++ b/utils/autodock-vina-filter-plugin/VERSION @@ -0,0 +1 @@ +0.1.0 diff --git a/utils/autodock-vina-filter-plugin/autodock_vina_filter_0@1@0.cwl b/utils/autodock-vina-filter-plugin/autodock_vina_filter_0@1@0.cwl new file mode 100644 index 00000000..a3090946 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/autodock_vina_filter_0@1@0.cwl @@ -0,0 +1,315 @@ + +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 + +class: CommandLineTool + +label: Filters results of the AutoDock Vina software. + +doc: |- + This class applies a cutoff to the docking scores of the AutoDock Vina software. + +baseCommand: ["python3", "-m", "polus.mm.utils.autodock_vina_filter"] + +hints: + DockerRequirement: + dockerPull: polusai/autodock-vina-filter-tool@sha256:ca69ac31ed3733d544abc6424646a9b508e6e886a7a08ffef7a902490b7fca61 + +requirements: + InlineJavascriptRequirement: {} + +inputs: +# NOTE: To make inference work, at least one of the following two log inputs needs to be non-optional. +# However, since there is no way in CWL (I think) to make inputs mutually exclusive, we would need to +# supply fake default values in the form of anonymous Files and then perform a check in the script. +# However, there is (at least one) bug in cwltool related to anonymous files (files that start with _:...) + Docker, etc. + + input_log_path: + label: Path to the log file + doc: |- + Path to the log file + Type: string + File type: output + Accepted formats: log + Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log + type: File? + format: edam:format_2330 + inputBinding: + position: 2 + prefix: --input_log_path + #default: {class: File, basename: nonexistent_logfile.log, contents: "", format: edam:format_2330} + + input_log_paths: + label: Path to the log files + doc: |- + Path to the log files + Type: string + File type: output + Accepted formats: log + Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log + type: File[]? + format: edam:format_2330 + inputBinding: + position: 2 + prefix: --input_log_paths + #default: [{class: File, basename: nonexistent_logfile.log, contents: "", format: edam:format_2330}] + + docking_score_cutoff: + label: Cutoff threshold for filtering docking scores + doc: |- + Cutoff threshold for filtering docking scores + Type: float + type: float + format: edam:format_2330 + inputBinding: + position: 3 + prefix: --docking_score_cutoff + + max_num_poses_per_ligand: + label: Maximum number of poses per initial ligand + doc: |- + Maximum number of poses per initial ligand + Type: int + type: int + format: edam:format_2330 + inputBinding: + position: 4 + prefix: --max_num_poses_per_ligand + default: -1 + + max_num_poses_total: + label: Maximum number of poses total + doc: |- + Maximum number of poses total + Type: int + type: int + format: edam:format_2330 + inputBinding: + position: 5 + prefix: --max_num_poses_total + default: -1 + + input_txt_path: + label: Experimental binding free energy data file (if any) + doc: |- + Experimental binding free energy data file (if any) + type: File? + format: edam:format_2330 + inputBinding: + position: 6 + prefix: --input_txt_path + + rescore: + label: Use True if autodock vina was run with --rescore + doc: |- + Use True if autodock vina was run with --rescore + type: boolean? + format: edam:format_2330 + inputBinding: + position: 7 + prefix: --rescore + + input_ligand_pdbqt_path: + label: Path to the input PDBQT ligands + doc: |- + Path to the input PDBQT ligands + Type: string + File type: input + Accepted formats: pdbqt + Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/vina/vina_ligand.pdbqt + type: {"type": "array", "items": {"type": "array", "items": "File"}} + #type: File[][] # Invalid syntax; [] syntactic sugar only works for 1D arrays. + format: edam:format_1476 +# inputBinding: +# position: 7 # Since the type is File[], this means starting at this position. +# prefix: --input_ligand_pdbqt_path + +# NOTE: This is only used so we can create explicit edges. +# The scatter-related inference bugs are now sorted out, so this can probably be removed. + output_ligand_pdbqt_path: + label: Path to the output PDBQT ligand files + doc: |- + Path to the output PDBQT ligand files + Type: string + File type: output + Accepted formats: pdbqt + Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.pdbqt + type: string + format: edam:format_1476 +# inputBinding: +# position: 7 +# prefix: --output_ligand_pdbqt_path + default: . + + input_receptor_pdbqt_path: + label: Path to the input PDBQT receptors + doc: |- + Path to the input PDBQT receptors + Type: string + File type: input + Accepted formats: pdbqt + Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/data/vina/vina_ligand.pdbqt + type: {"type": "array", "items": {"type": "array", "items": "File"}} + #type: File[][] # Invalid syntax; [] syntactic sugar only works for 1D arrays. + format: edam:format_1476 + + output_receptor_pdbqt_path: + label: Path to the output PDBQT receptor files + doc: |- + Path to the output PDBQT receptor files + Type: string + File type: output + Accepted formats: pdbqt + Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.pdbqt + type: string + format: edam:format_1476 + default: . + +outputs: +# NOTE: If docking_score_cutoff is too negative and filters out all of the files, +# you will get the following runtime error message: +# ("Error collecting output for parameter 'output_ligand_pdbqt_path': cwl_adapters/autodock_vina_filter.cwl:73:3: 'NoneType' object does not support item assignment", {}) + output_ligand_pdbqt_path: + label: Path to the output PDBQT file + doc: |- + Path to the output PDBQT file +# While 2D array inputs seem to be working, all of my attempts to output a 2D array using +# outputEval have failed due to generating a stack trace in the cwltool python process: +# ("Error collecting output for parameter 'output_ligand_pdbqt_path': cwl_adapters/autodock_vina_filter.cwl:134:3: list indices must be integers or slices, not str", {}) +# I have even tried outputEval: $([[]]) and outputEval: $(inputs.output_ligand_pdbqt_path) +# Note that outputEval: $([]) and outputEval: $(inputs.output_ligand_pdbqt_path[0]) with type: File[] works. + #type: {"type": "array", "items": {"type": "array", "items": "File"}} + #type: File[][] # See above + type: File[] + outputBinding: + glob: indices.txt # This determines what binds to self[0] + loadContents: true # If true, this additionally binds self[0].contents + # NOTE: According to the CWL specification, loadContents only reads the first 64KB. + # In version 1.2, any more is an error; in versions 1.0 and 1.1, the file contents is silently truncated! + # Since log files and/or pdb files can easily exceed that, we do all + # processing using an external script, read in the final indices here, + # and index into the existing json. This preserves the additional json metadata fields, + # (location, basename, class, checksum, size, format, path) + # which would be difficult to recover from an external script which is only given path. + # (An external script can write a cwl.output.json file, which will alternatively determine the outputs.) + outputEval: | + ${ + var lines = self[0].contents.split("\n"); + var files = []; // In this case, flatten the 2D nested array into a 1D array + for (var i = 0; i < lines.length; i++) { + var indices = lines[i].split(" "); + //var docking_score = parseFloat(indices[0]); // See below + var mol_idx = parseInt(indices[1]); + var mode_idx = parseInt(indices[2]); + var file = inputs.input_ligand_pdbqt_path[mol_idx][mode_idx]; + files.push(file); + } + return files; + } + format: edam:format_1476 + + output_receptor_pdbqt_path: + label: Path to the output PDBQT file + doc: |- + Path to the output PDBQT file + type: File[] + outputBinding: + glob: indices.txt # This determines what binds to self[0] + loadContents: true # If true, this additionally binds self[0].contents + outputEval: | + ${ + var lines = self[0].contents.split("\n"); + var files = []; // In this case, flatten the 2D nested array into a 1D array + for (var i = 0; i < lines.length; i++) { + var indices = lines[i].split(" "); + //var docking_score = parseFloat(indices[0]); // See below + var mol_idx = parseInt(indices[1]); + var mode_idx = parseInt(indices[2]); + var file = inputs.input_receptor_pdbqt_path[mol_idx][mode_idx]; + files.push(file); + } + return files; + } + format: edam:format_1476 + + docking_scores: + label: Estimated Free Energies of Binding + doc: |- + Estimated Free Energies of Binding + type: + type: array + items: float + outputBinding: + glob: indices.txt + loadContents: true + outputEval: | + ${ + var lines = self[0].contents.split("\n"); + var docking_scores = []; + for (var i = 0; i < lines.length; i++) { + var indices = lines[i].split(" "); + var docking_score = parseFloat(indices[0]); + docking_scores.push(docking_score); + } + return docking_scores; + } + + experimental_binding_data: + label: Experimental binding data (if any) + doc: |- + Experimental binding data (if any) + type: ["null", {"type": "array", "items": "float"}] + outputBinding: + glob: indices.txt + loadContents: true + outputEval: | + ${ + var lines = self[0].contents.split("\n"); + var data = []; + for (var i = 0; i < lines.length; i++) { + var indices = lines[i].split(" "); + if (indices.length > 3) { + var datum = parseFloat(indices[3]); + data.push(datum); + } + } + if (data.length == 0) { + return null; + } else { + return data; + } + } + + experimental_dGs: + label: Experimental binding free energy dG values (if any) + doc: |- + Experimental binding free energy dG values (if any) + type: ["null", {"type": "array", "items": "float"}] + outputBinding: + glob: indices.txt + loadContents: true + outputEval: | + ${ + var lines = self[0].contents.split("\n"); + var dGs = []; + for (var i = 0; i < lines.length; i++) { + var indices = lines[i].split(" "); + if (indices.length > 4) { + var dG = parseFloat(indices[4]); + dGs.push(dG); + } + } + if (dGs.length == 0) { + return null; + } else { + return dGs; + } + } + +stdout: stdout + +$namespaces: + edam: https://edamontology.org/ + +$schemas: +- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl diff --git a/utils/autodock-vina-filter-plugin/build-docker.sh b/utils/autodock-vina-filter-plugin/build-docker.sh new file mode 100644 index 00000000..c34b2aea --- /dev/null +++ b/utils/autodock-vina-filter-plugin/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$(", "Brandon Walker "] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = ">=3.9,<3.13" +typer = "^0.7.0" +sophios = "0.1.4" + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +pytest = "^7.4" +pytest-sugar = "^0.9.6" +pre-commit = "^3.2.1" +black = "^23.3.0" +mypy = "^1.1.1" +ruff = "^0.0.270" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__init__.py b/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__init__.py new file mode 100644 index 00000000..d1234afb --- /dev/null +++ b/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__init__.py @@ -0,0 +1,7 @@ +"""autodock_vina_filter.""" + +__version__ = "0.1.0" + +from polus.mm.utils.autodock_vina_filter.autodock_vina_filter import ( # noqa # pylint: disable=unused-import + autodock_vina_filter, +) diff --git a/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__main__.py b/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__main__.py new file mode 100644 index 00000000..72230817 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/__main__.py @@ -0,0 +1,85 @@ +"""Package entrypoint for the autodock_vina_filter package.""" + +# Base packages +import logging +from os import environ +from pathlib import Path +from typing import List + +import typer +from polus.mm.utils.autodock_vina_filter.autodock_vina_filter import ( + autodock_vina_filter, +) + +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO")) +logger = logging.getLogger("polus.mm.utils.autodock_vina_filter.") +logger.setLevel(POLUS_LOG) + +app = typer.Typer(help="autodock_vina_filter.") + + + +@app.command() +def main( + input_log_path: str = typer.Option( + ..., + '--input_log_path', + help='Path to the log file, Type: string, File type: output, Accepted formats: log, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log', + ), + input_log_paths: List[str] = typer.Option( + ..., + '--input_log_paths', + help='Path to the log files, Type: string, File type: output, Accepted formats: log, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log', + ), + docking_score_cutoff: float = typer.Option( + ..., + '--docking_score_cutoff', + help='Cutoff threshold for filtering docking scores, Type: float', + ), + max_num_poses_per_ligand: int = typer.Option( + ..., + '--max_num_poses_per_ligand', + help='Maximum number of poses per initial ligand, Type: int', + ), + max_num_poses_total: int = typer.Option( + ..., + '--max_num_poses_total', + help='Maximum number of poses total, Type: int', + ), + input_txt_path: str = typer.Option( + ..., + '--input_txt_path', + help='Experimental binding free energy data file (if any)', + ), + rescore: bool = typer.Option( + ..., + '--rescore', + help='Use True if autodock vina was run with --rescore', + ), + +) -> None: + """autodock_vina_filter.""" + logger.info(f"input_log_path: {input_log_path}") + logger.info(f"input_log_paths: {input_log_paths}") + logger.info(f"docking_score_cutoff: {docking_score_cutoff}") + logger.info(f"max_num_poses_per_ligand: {max_num_poses_per_ligand}") + logger.info(f"max_num_poses_total: {max_num_poses_total}") + logger.info(f"input_txt_path: {input_txt_path}") + logger.info(f"rescore: {rescore}") + + autodock_vina_filter( + input_log_path=input_log_path, + input_log_paths=input_log_paths, + docking_score_cutoff=docking_score_cutoff, + max_num_poses_per_ligand=max_num_poses_per_ligand, + max_num_poses_total=max_num_poses_total, + input_txt_path=input_txt_path, + rescore=rescore) + + +if __name__ == '__main__': + app() \ No newline at end of file diff --git a/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/autodock_vina_filter.py b/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/autodock_vina_filter.py new file mode 100644 index 00000000..25881c02 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/src/polus/mm/utils/autodock_vina_filter/autodock_vina_filter.py @@ -0,0 +1,159 @@ +import sys +from typing import List, Tuple +from pathlib import Path + +def autodock_vina_filter(input_log_path : str, input_log_paths : list[str], docking_score_cutoff : float, max_num_poses_per_ligand : int, + max_num_poses_total : int, input_txt_path : str, rescore : bool): + '''autodock_vina_filter. + + Args: + script: + input_log_path: Path to the log file, Type: string, File type: output, Accepted formats: log, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log + input_log_paths: Path to the log files, Type: string, File type: output, Accepted formats: log, Example file: https://github.com/bioexcel/biobb_vs/raw/master/biobb_vs/test/reference/vina/ref_output_vina.log + docking_score_cutoff: Cutoff threshold for filtering docking scores, Type: float + max_num_poses_per_ligand: Maximum number of poses per initial ligand, Type: int + max_num_poses_total: Maximum number of poses total, Type: int + input_txt_path: Experimental binding free energy data file (if any) + rescore: Use True if autodock vina was run with --rescore + Returns: + None + ''' + if max_num_poses_per_ligand == -1: + max_num_poses_per_ligand = sys.maxsize + if max_num_poses_total == -1: + max_num_poses_total = sys.maxsize + + if input_log_path: + with open(input_log_path, mode='r', encoding='utf-8') as f: + lines = f.readlines() + + if input_log_paths: + lines_all = [] + for path in input_log_paths: + with open(path, mode='r', encoding='utf-8') as f: + lines = f.readlines() + lines_all.extend(lines) + lines = lines_all + + for line in lines: + print(line) + + # After the initial headers, an autodock vina log file consists of + # 1 or more blocks of the following form: + # Performing docking (random seed: -193947260) ... + # 0% 10 20 30 40 50 60 70 80 90 100% + # |----|----|----|----|----|----|----|----|----|----| + # *************************************************** + + # mode | affinity | dist from best mode + # | (kcal/mol) | rmsd l.b.| rmsd u.b. + # -----+------------+----------+---------- + # 1 -5.773 0 0 + # 2 -5.577 4.821 7.207 + # 3 -5.46 4.053 6.446 + # 4 -5.44 1.344 3.349 + # 5 -5.431 3.576 5.58 + # 6 -5.412 3.012 5.868 + # 7 -5.405 1.914 4.145 + # 8 -5.403 3.209 6.271 + # 9 -5.392 2.556 4.714 + # If rescoring, the relevant line from each log file is of the form: + # Estimated Free Energy of Binding : -8.659 (kcal/mol) [=(1)+(2)+(3)-(4)] + + scores: List[float] = [] + scores_all: List[List[float]] = [] + if rescore: + for line in lines: + if line.startswith('Estimated Free Energy of Binding'): + scores_all.append([float(line.split()[6])]) + else: + parsing = False + for line in lines: + if line.startswith('-----+------------+----------+----------'): + scores = [] + parsing = True + continue + + if parsing: + try: + strs = line.split() + mode_idx = int(strs[0]) + floats = [float(x) for x in strs[1:]] + score = floats[0] + scores.append(score) + except Exception as e: # pylint: disable=broad-exception-caught + scores_all.append(scores) + parsing = False + + if parsing: # When we reach end of file, we need to append one last time. + scores_all.append(scores) + + if input_txt_path: # If we have experimental data + with open(input_txt_path, mode='r', encoding='utf-8') as f: + lines_exp = f.readlines() + if len(lines_exp) != len(scores_all): + print('Error! len(lines_exp), len(scores_all)', len(lines_exp), len(scores_all)) + sys.exit(1) + + # Now we need to globally sort (by the docking score) and apply the max total, while preserving the 2D structure. + indexed_scores_exp = [] # : List[Tuple[float, Tuple[int, int], float, float]] + for mol_idx, scores in enumerate(scores_all): + split = lines_exp[mol_idx].split() # NOTE: Important. + # This is how we duplicate the experimental data for each binding mode + # to workaround the fact that for CWL scatter dotproduct, the arrays + # must be the same length. This is why all 'extra' data must be + # 'factored through' each filtering operation. :( + smiles = split[0] + datum = float(split[1]) + temp_exp = [] # type: ignore #: List[Tuple[float, Tuple[int, int], float, float]] + for mode_idx, score in enumerate(scores): + if score < docking_score_cutoff and len(temp_exp) < max_num_poses_per_ligand: + if len(split) > 2: + dG = float(split[2]) + temp_exp.append((score, (mol_idx, mode_idx), datum, dG)) + else: + temp_exp.append((score, (mol_idx, mode_idx), datum)) # type: ignore + for t1 in temp_exp: + indexed_scores_exp.append(t1) + + indexed_scores_exp.sort(key=lambda x: x[0]) # Sort by the docking scores + indexed_scores_exp = indexed_scores_exp[:max_num_poses_total] # Truncate upto max total + # indexed_scores_exp.sort(key=lambda x: x[1]) # Sort by the index tuple, which uses dictionary order. + + indices_all = [] + for (score, (mol_idx, mode_idx), datum, dG) in indexed_scores_exp: + indices_all.append(str(score) + " " + str(mol_idx) + " " + str(mode_idx) + " " + str(datum) + " " + str(dG)) + + with open('indices.txt', mode='w', encoding='utf-8') as f: + f.write('\n'.join(indices_all)) + else: + # Now we need to globally sort (by the docking score) and apply the max total, while preserving the 2D structure. + indexed_scores: List[Tuple[float, Tuple[int, int]]] = [] + for mol_idx, scores in enumerate(scores_all): + temp: List[Tuple[float, Tuple[int, int]]] = [] + for mode_idx, score in enumerate(scores): + if score < docking_score_cutoff and len(temp) < max_num_poses_per_ligand: + temp.append((score, (mol_idx, mode_idx))) + for t2 in temp: + indexed_scores.append(t2) + + indexed_scores.sort(key=lambda x: x[0]) # Sort by the docking scores + indexed_scores = indexed_scores[:max_num_poses_total] # Truncate upto max total + # indexed_scores.sort(key=lambda x: x[1]) # Sort by the index tuple, which uses dictionary order. + + indices_all = [] + for (score, (mol_idx, mode_idx)) in indexed_scores: + indices_all.append(str(score) + " " + str(mol_idx) + " " + str(mode_idx)) + + with open('indices.txt', mode='w', encoding='utf-8') as f: + f.write('\n'.join(indices_all)) + + # TODO: Since outputEval doesn't seem to work for outputting 2D arrays, + # Try to specify the outputs by writing cwl.output.json + # See https://cwl.discourse.group/t/cwl-output-json/579 + # outputs: Dict[str, Any] = {} + # outputs['output_batch_pdbqt_path'] = [[]] + + # with open('cwl.output.json', mode='w', encoding='utf-8') as f: + # f.write(json.dumps(outputs)) + diff --git a/utils/autodock-vina-filter-plugin/tests/1e3g.pdbqt b/utils/autodock-vina-filter-plugin/tests/1e3g.pdbqt new file mode 100644 index 00000000..d0a98b22 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/tests/1e3g.pdbqt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24bf6ec9386d191d5c680bf100af549c86957d6f6229533a2b4dbad7860468c5 +size 4119 diff --git a/utils/autodock-vina-filter-plugin/tests/1e3g_protein.pdb b/utils/autodock-vina-filter-plugin/tests/1e3g_protein.pdb new file mode 100644 index 00000000..291a88ab --- /dev/null +++ b/utils/autodock-vina-filter-plugin/tests/1e3g_protein.pdb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4537cf78a23a487d8892f566fab24d1fd7c36e2813324378ffaf790aabc7e01 +size 324170 diff --git a/utils/autodock-vina-filter-plugin/tests/__init__.py b/utils/autodock-vina-filter-plugin/tests/__init__.py new file mode 100644 index 00000000..3627a081 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for autodock_vina_filter.""" diff --git a/utils/autodock-vina-filter-plugin/tests/binding_data.txt b/utils/autodock-vina-filter-plugin/tests/binding_data.txt new file mode 100644 index 00000000..c90cc8a7 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/tests/binding_data.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2ef33f53e6feb02649c59d6a86c30df613ffe6d50ba8ac8705aaad6e20eae8 +size 50 diff --git a/utils/autodock-vina-filter-plugin/tests/test_autodock_vina_filter.py b/utils/autodock-vina-filter-plugin/tests/test_autodock_vina_filter.py new file mode 100644 index 00000000..fdc5e10c --- /dev/null +++ b/utils/autodock-vina-filter-plugin/tests/test_autodock_vina_filter.py @@ -0,0 +1,79 @@ +"""Tests for autodock_vina_filter.""" +import sys +from pathlib import Path + +from polus.mm.utils.autodock_vina_filter.autodock_vina_filter import ( + autodock_vina_filter, +) + +current_dir = Path(__file__).resolve().parent +target_dir = current_dir.parent.parent.parent / "cwl_utils" +sys.path.append(str(target_dir)) + +from cwl_utilities import call_cwltool # noqa: E402 +from cwl_utilities import create_input_yaml # noqa: E402 +from cwl_utilities import parse_cwl_arguments # noqa: E402 + + +def test_autodock_vina_filter() -> None: + """Test autodock_vina_filter.""" + input_log_paths = None + input_log_path = "vina.log" + input_txt_path = "binding_data.txt" + docking_score_cutoff = -1.0 + max_num_poses_per_ligand = 1 + max_num_poses_total = 1 + rescore = True + input_log_path = str(Path(__file__).resolve().parent / Path(input_log_path)) + input_txt_path = str(Path(__file__).resolve().parent / Path(input_txt_path)) + + autodock_vina_filter( + input_log_path, + input_log_paths, + docking_score_cutoff, + max_num_poses_per_ligand, + max_num_poses_total, + input_txt_path, + rescore, + ) + assert Path("indices.txt").exists() + + +def test_cwl_autodock_vina_filter() -> None: + """Test autodock_vina_filter in cwltool.""" + cwl_file = Path("autodock_vina_filter_0@1@0.cwl") + input_to_props = parse_cwl_arguments(cwl_file) + + input_log_path = "vina.log" + input_log_path = str(Path(__file__).resolve().parent / Path(input_log_path)) + input_to_props["input_log_path"]["path"] = [input_log_path] + input_to_props["input_log_path"]["class"] = "File" + + input_txt_path = "binding_data.txt" + input_txt_path = str(Path(__file__).resolve().parent / Path(input_txt_path)) + input_to_props["input_txt_path"]["path"] = input_txt_path + input_to_props["input_txt_path"]["class"] = "File" + + input_ligand_pdbqt_path = "1e3g.pdqt" + input_ligand_pdbqt_path = str( + Path(__file__).resolve().parent / Path(input_ligand_pdbqt_path), + ) + input_to_props["input_ligand_pdbqt_path"] = f"['{input_ligand_pdbqt_path}']" + + input_receptor_pdbqt_path = "1e3g_protein.pdb" + input_receptor_pdbqt_path = str( + Path(__file__).resolve().parent / Path(input_receptor_pdbqt_path), + ) + input_to_props["input_receptor_pdbqt_path"] = f"['{input_receptor_pdbqt_path}']" + + input_to_props["docking_score_cutoff"] = -1.0 + input_to_props["max_num_poses_per_ligand"] = 1 + input_to_props["max_num_poses_total"] = 1 + input_to_props["rescore"] = True + + del input_to_props["input_log_paths"] + input_yaml_path = Path("autodock_vina_filter.yml") + create_input_yaml(input_to_props, input_yaml_path) + call_cwltool(cwl_file, input_yaml_path) + + assert Path("indices.txt").exists() diff --git a/utils/autodock-vina-filter-plugin/tests/vina.log b/utils/autodock-vina-filter-plugin/tests/vina.log new file mode 100644 index 00000000..a6c937d9 --- /dev/null +++ b/utils/autodock-vina-filter-plugin/tests/vina.log @@ -0,0 +1,40 @@ +AutoDock Vina v1.2.5-8-g8604f73 +################################################################# +# If you used AutoDock Vina in your work, please cite: # +# # +# J. Eberhardt, D. Santos-Martins, A. F. Tillack, and S. Forli # +# AutoDock Vina 1.2.0: New Docking Methods, Expanded Force # +# Field, and Python Bindings, J. Chem. Inf. Model. (2021) # +# DOI 10.1021/acs.jcim.1c00203 # +# # +# O. Trott, A. J. Olson, # +# AutoDock Vina: improving the speed and accuracy of docking # +# with a new scoring function, efficient optimization and # +# multithreading, J. Comp. Chem. (2010) # +# DOI 10.1002/jcc.21334 # +# # +# Please see https://github.com/ccsb-scripps/AutoDock-Vina for # +# more information. # +################################################################# + +Scoring function : vina +Rigid receptor: /var/lib/cwl/stga009b620-fa10-45b3-bb50-caf1e722f37d/system.pdbqt +Ligand: /PESXAc/system.pdbqt +Grid center: ligand center (autobox) +Grid size : ligand size + 4 A in each dimension (autobox) +Grid space : 0.375 +Exhaustiveness: 8 +CPU: 0 +Verbosity: 1 + +Computing Vina grid ... done. +Estimated Free Energy of Binding : -10.522 (kcal/mol) [=(1)+(2)+(3)-(4)] +(1) Final Intermolecular Energy : -11.445 (kcal/mol) + Ligand - Receptor : -11.445 (kcal/mol) + Ligand - Flex side chains : 0.000 (kcal/mol) +(2) Final Total Internal Energy : 0.000 (kcal/mol) + Ligand : 0.000 (kcal/mol) + Flex - Receptor : 0.000 (kcal/mol) + Flex - Flex side chains : 0.000 (kcal/mol) +(3) Torsional Free Energy : 0.923 (kcal/mol) +(4) Unbound System's Energy : 0.000 (kcal/mol)