-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
92d2d65
commit 67e93ea
Showing
17 changed files
with
1,233 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/extract_pdbids_drugbank/__init__.py] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
FROM condaforge/mambaforge | ||
|
||
ENV EXEC_DIR="/opt/executables" | ||
ENV POLUS_LOG="INFO" | ||
RUN mkdir -p ${EXEC_DIR} | ||
|
||
|
||
# Work directory defined in the base container | ||
# WORKDIR ${EXEC_DIR} | ||
|
||
COPY pyproject.toml ${EXEC_DIR} | ||
COPY VERSION ${EXEC_DIR} | ||
COPY README.md ${EXEC_DIR} | ||
COPY CHANGELOG.md ${EXEC_DIR} | ||
|
||
# Install needed packages here | ||
COPY src ${EXEC_DIR}/src | ||
|
||
RUN pip3 install ${EXEC_DIR} --no-cache-dir | ||
|
||
CMD ["--help"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# extract_pdbids_drugbank (0.1.0) | ||
|
||
Filter the Drugbank database | ||
|
||
## Options | ||
|
||
This plugin takes 5 input arguments and 4 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| drugbank_xml_file_path | Path to the Drugbank xml file | Input | File | File | | ||
| smiles | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| inchi | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| inchi_keys | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| output_txt_path | Path to the text dataset file, Type: string, File type: output, Accepted formats: txt | Input | string | string | | ||
| output_txt_path | Path to the txt file | Output | File | File | | ||
| output_smiles | The Smiles of small molecules | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | | ||
| output_pdbids_1D | The PDB IDs of target structures in 1D array | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | | ||
| output_pdbids_2D | The PDB IDs of target structures in 2D array | Output | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
version=$(<VERSION) | ||
docker build . -t polusai/extract-pdbids-drugbank-tool:${version} |
185 changes: 185 additions & 0 deletions
185
utils/extract-pdbids-drugbank-plugin/extract_pdbids_drugbank_0@[email protected]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
#!/usr/bin/env cwl-runner | ||
cwlVersion: v1.0 | ||
|
||
class: CommandLineTool | ||
|
||
label: Filter the Drugbank database | ||
|
||
doc: |- | ||
Filter the Drugbank database | ||
|
||
baseCommand: ["python", "-m", "polus.mm.utils.extract_pdbids_drugbank"] | ||
|
||
hints: | ||
DockerRequirement: | ||
dockerPull: polusai/extract-pdbids-drugbank-tool@sha256:60097d534aef1ced5e08bc896d541a5364d8452a16d883b845e29552a99027b4 | ||
|
||
requirements: | ||
InlineJavascriptRequirement: {} | ||
# Enabling InitialWorkDirRequirement will stage the input Drugbank xml file | ||
InitialWorkDirRequirement: | ||
listing: | ||
- $(inputs.drugbank_xml_file_path) | ||
|
||
inputs: | ||
drugbank_xml_file_path: | ||
label: Path to the Drugbank xml file | ||
doc: |- | ||
Path to the Drugbank xml file | ||
type: File | ||
format: edam:format_2332 | ||
inputBinding: | ||
prefix: --drugbank_xml_file_path | ||
|
||
smiles: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: edam:format_2330 | ||
inputBinding: | ||
prefix: --smiles | ||
default: [] | ||
|
||
inchi: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --inchi | ||
default: [] | ||
|
||
inchi_keys: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --inchi_keys | ||
default: [] | ||
|
||
output_txt_path: | ||
label: Path to the text dataset file | ||
doc: |- | ||
Path to the text dataset file | ||
Type: string | ||
File type: output | ||
Accepted formats: txt | ||
type: string | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --output_txt_path | ||
default: system.log | ||
|
||
outputs: | ||
output_txt_path: | ||
label: Path to the txt file | ||
doc: |- | ||
Path to the txt file | ||
type: File | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
format: edam:format_2330 | ||
|
||
output_smiles: | ||
label: The Smiles of small molecules | ||
doc: |- | ||
The Smiles of small molecules | ||
type: | ||
type: array | ||
items: string | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove black lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var smiles = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation. We need to duplicate it, so each SMILES string | ||
// corresponds to a PDB ID in the PDB IDs array. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
for (var j = 1; j < words.length; j++) { | ||
smiles.push(words[0]); | ||
} | ||
} | ||
return smiles; | ||
} | ||
|
||
output_pdbids_1D: | ||
label: The PDB IDs of target structures in 1D array | ||
doc: |- | ||
The PDB IDs of target structures in 1D array | ||
type: | ||
type: array | ||
items: string | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove blank lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var pdbids = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation and the rest are the target structure PDB IDs. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
for (var j = 1; j < words.length; j++) { | ||
pdbids.push(words[j]); | ||
} | ||
} | ||
return pdbids; | ||
} | ||
|
||
output_pdbids_2D: | ||
label: The PDB IDs of target structures in 2D array | ||
doc: |- | ||
The PDB IDs of target structures in 2D array | ||
type: {"type": "array", "items": {"type": "array", "items": "string"}} | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove blank lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var pdbids_2d = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation and the rest are the target structure PDB IDs. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
var pdbids = []; | ||
for (var j = 1; j < words.length; j++) { | ||
pdbids.push(words[j]); | ||
} | ||
pdbids_2d.push(pdbids); | ||
} | ||
return pdbids_2d; | ||
} | ||
|
||
$namespaces: | ||
edam: https://edamontology.org/ | ||
|
||
$schemas: | ||
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
specVersion: "0.1.0" | ||
name: extract_pdbids_drugbank | ||
version: 0.1.0 | ||
container: extract-pdbids-drugbank-plugin | ||
entrypoint: | ||
title: extract_pdbids_drugbank | ||
description: Filter the Drugbank database | ||
author: Brandon Walker, Nazanin Donyapour | ||
contact: [email protected], [email protected] | ||
repository: | ||
documentation: | ||
citation: | ||
|
||
inputs: | ||
- name: drugbank_xml_file_path | ||
required: true | ||
description: Path to the Drugbank xml file | ||
type: File | ||
defaultValue: {'class': 'File', 'location': '../../../fda_drug_dataset/drugbank/drugbank_5.1.10.xml'} | ||
format: | ||
uri: edam:format_2332 | ||
- name: smiles | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: inchi | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: inchi_keys | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_txt_path | ||
required: true | ||
description: Path to the text dataset file, Type string, File type output, Accepted formats txt | ||
type: string | ||
defaultValue: system.log | ||
format: | ||
uri: edam:format_2330 | ||
outputs: | ||
- name: output_txt_path | ||
required: true | ||
description: Path to the txt file | ||
type: File | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_smiles | ||
required: true | ||
description: The Smiles of small molecules | ||
type: {'type': 'array', 'items': 'string'} | ||
- name: output_pdbids_1D | ||
required: true | ||
description: The PDB IDs of target structures in 1D array | ||
type: {'type': 'array', 'items': 'string'} | ||
- name: output_pdbids_2D | ||
required: true | ||
description: The PDB IDs of target structures in 2D array | ||
type: {'type': 'array', 'items': {'type': 'array', 'items': 'string'}} | ||
ui: | ||
- key: inputs.drugbank_xml_file_path | ||
title: "drugbank_xml_file_path: " | ||
description: "Path to the Drugbank xml file" | ||
type: File | ||
- key: inputs.smiles | ||
title: "smiles: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.inchi | ||
title: "inchi: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.inchi_keys | ||
title: "inchi_keys: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.output_txt_path | ||
title: "output_txt_path: " | ||
description: "Path to the text dataset file, Type string, File type output, Accepted formats txt" | ||
type: string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[tool.poetry] | ||
name = "polus-mm-utils-extract-pdbids-drugbank" | ||
version = "0.1.0" | ||
description = "Filter the Drugbank database" | ||
authors = ["Nazanin Donyapour <[email protected]>", "Brandon Walker <[email protected]>"] | ||
readme = "README.md" | ||
packages = [{include = "polus", from = "src"}] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.9,<3.13" | ||
typer = "^0.7.0" | ||
sophios = "0.1.4" | ||
pandas = "2.2.2" | ||
rdkit = "2024.3.5" | ||
defusedxml = "0.7.1" | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
bump2version = "^1.0.1" | ||
pytest = "^7.4" | ||
pytest-sugar = "^0.9.6" | ||
pre-commit = "^3.2.1" | ||
black = "^23.3.0" | ||
mypy = "^1.1.1" | ||
ruff = "^0.0.270" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" | ||
|
||
[tool.pytest.ini_options] | ||
pythonpath = [ | ||
"." | ||
] |
Oops, something went wrong.