Skip to content

Commit

Permalink
pdb download (#12)
Browse files Browse the repository at this point in the history
Co-authored-by: Brandon Duane Walker <[email protected]>
  • Loading branch information
2 people authored and Brandon Duane Walker committed Jun 12, 2024
1 parent 07e3016 commit c4842b6
Show file tree
Hide file tree
Showing 17 changed files with 104 additions and 18 deletions.
4 changes: 0 additions & 4 deletions utils/extract-ligand-protein-plugin/build-docker.sh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# docker build -f Dockerfile -t polusai/extract-ligand-protein-tool .
FROM condaforge/mambaforge

ENV EXEC_DIR="/opt/executables"
Expand All @@ -15,11 +16,6 @@ COPY CHANGELOG.md ${EXEC_DIR}

# Install needed packages here

RUN pip install filepattern
RUN conda config --add channels conda-forge
RUN conda install mdanalysis


COPY src ${EXEC_DIR}/src

RUN pip3 install ${EXEC_DIR} --no-cache-dir
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/extract-ligand-protein-tool:${version}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: A tool that employs OpenMM to extract ligands and protein from a PDB file

doc: |-
A tool that employs OpenMM to extract ligands and protein from a PDB file

baseCommand: ["python", "-m", "polus.mm.utils.extract_ligand_protein"]

hints:
DockerRequirement:
dockerPull: polusai/extract-ligand-protein-tool@sha256:38416f3d020c26869028c6ba66a243882d0b3c5885c79ff68355912ec5768fc1

inputs:
input_pdb_path:
label: Input pdb file path
doc: |-
Input pdb file path
Type: string
File type: input
Accepted formats: pdb
Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/cat_protein.pdb
type: File
format:
- edam:format_1476
inputBinding:
prefix: --input_pdb_path

output_pdb_path:
label: Output pdb file path
doc: |-
Output pdb file path
Type: string
File type: output
Accepted formats: pdb
Example file: https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_cat_pdb.pdb
type: string
format:
- edam:format_1476
inputBinding:
prefix: --output_pdb_path
default: system.pdb

output_pdb_ligand_path:
label: Output pdb ligand file path
doc: |-
Output pdb ligand file path
Type: string
File type: output
Accepted formats: sdf
type: string
format:
- edam:format_1476
inputBinding:
prefix: --output_pdb_ligand_path
default: ligand_system.pdb

outputs:
output_pdb_path:
label: Output pdb file path
doc: |-
Output pdb file path
type: File
outputBinding:
glob: $(inputs.output_pdb_path)
format: edam:format_1476

output_pdb_ligand_path:
label: Output ligand pdb file path
doc: |-
Output ligand pdb file path
Use optional File? since ligand may not exist in complex
type: File?
outputBinding:
glob: $(inputs.output_pdb_ligand_path)
format: edam:format_1476

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
specVersion: "0.1.0"
name: extract_ligand_protein
version: 0.1.0
container: extract-ligand-protein-plugin
container: extract-ligand-protein-tool
entrypoint:
title: extract_ligand_protein
description: A tool that employs OpenMM to extract ligands and protein from a PDB file
author: Data Scientist
contact: [email protected]
author: Brandon Walker
contact: [email protected]
repository:
documentation:
citation:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ def extract_single_ligand_protein( # noqa: PLR0912
except MDAnalysis.exceptions.NoDataError:
print("No bonds found in the PDB file.") # noqa: T201

# Identify water molecules based on the
# connectivity pattern (Oxygen bonded to two Hydrogens)
# Identify water molecules based on the connectivity
# pattern (Oxygen bonded to two Hydrogens)
if has_bonds:
water_indices = set()
for atom in dup_u.atoms: # dont use selection resname == 'HOH',
# pdb file may have different water residue names
h_bonds = 2
num_bonds = 2
if (
atom.name == "O" and len(atom.bonds) == h_bonds
atom.name == "O" and len(atom.bonds) == num_bonds
): # if hydrogens are added
bonded_atoms_names = {a.name for a in atom.bonded_atoms}
if bonded_atoms_names == {"H"}: # Check if both bonds are Hydrogens
Expand Down Expand Up @@ -90,9 +90,14 @@ def extract_single_ligand_protein( # noqa: PLR0912
) # needed for coordinates
ligand_u.atoms = ligand_atoms

protein_u.atoms.write(str(output_pdb_path))
with output_pdb_path.open(mode="w", encoding="utf-8") as output_file:
protein_u.atoms.write(output_file)
if len(ligand_u.atoms) > 0: # will crash if no ligand atoms
ligand_u.atoms.write(str(output_pdb_ligand_path))
with output_pdb_ligand_path.open(
mode="w",
encoding="utf-8",
) as output_ligand_file:
ligand_u.atoms.write(output_ligand_file)


def extract_all_ligand_protein(input_pdb_path: list[Path], outdir: Path) -> None:
Expand Down

0 comments on commit c4842b6

Please sign in to comment.