-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
23fb84d
commit 81edf6f
Showing
19 changed files
with
1,202 additions
and
0 deletions.
There are no files selected for viewing
29 changes: 29 additions & 0 deletions
29
utils/extract-pdbids-drugbank-xsdata-plugin/.bumpversion.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/extract_pdbids_drugbank_xsdata/__init__.py] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
FROM condaforge/mambaforge | ||
|
||
ENV EXEC_DIR="/opt/executables" | ||
ENV POLUS_LOG="INFO" | ||
RUN mkdir -p ${EXEC_DIR} | ||
|
||
|
||
# Work directory defined in the base container | ||
# WORKDIR ${EXEC_DIR} | ||
|
||
COPY pyproject.toml ${EXEC_DIR} | ||
COPY VERSION ${EXEC_DIR} | ||
COPY README.md ${EXEC_DIR} | ||
COPY CHANGELOG.md ${EXEC_DIR} | ||
|
||
# Install needed packages here | ||
# errors installing rdkit from poetry so using conda | ||
COPY environment.yml ${EXEC_DIR} | ||
RUN mamba env create -f ${EXEC_DIR}/environment.yml | ||
RUN echo "source activate project_env" > ~/.bashrc | ||
ENV PATH /opt/conda/envs/env/bin:$PATH | ||
|
||
COPY src ${EXEC_DIR}/src | ||
|
||
RUN pip3 install ${EXEC_DIR} --no-cache-dir | ||
|
||
CMD ["--help"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# extract_pdbids_drugbank_xsdata (0.1.0) | ||
|
||
Filter Drugbank database using xsData | ||
|
||
## Options | ||
|
||
This plugin takes 5 input arguments and 3 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| drugbank_xml_file_path | Path to the Drugbank xml file | Input | File | File | | ||
| smiles | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| inchi | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| inchi_keys | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] | | ||
| output_txt_path | Path to the text dataset file, Type: string, File type: output, Accepted formats: txt | Input | string | string | | ||
| output_txt_path | Path to the txt file | Output | File | File | | ||
| output_smiles | The Smiles of small molecules | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | | ||
| output_pdb_ids | The PDB IDs of target structures | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
version=$(<VERSION) | ||
docker build . -t polusai/extract-pdbids-drugbank-xsdata-plugin:${version} |
14 changes: 14 additions & 0 deletions
14
utils/extract-pdbids-drugbank-xsdata-plugin/environment.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
name: project_env | ||
channels: | ||
- conda-forge | ||
dependencies: | ||
- python==3.10 | ||
- rdkit==2024.03.1 | ||
- pytest==8.1.1 | ||
- cwltool==3.1.20240404144621 | ||
- cwl-utils==0.33 | ||
- pip | ||
- pip: | ||
- xsdata-pydantic[cli,lxml,soap] | ||
- --extra-index-url=https://test.pypi.org/simple | ||
- drugbank-schemas |
174 changes: 174 additions & 0 deletions
174
utils/extract-pdbids-drugbank-xsdata-plugin/extract_pdbids_drugbank_xsdata.cwl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
#!/usr/bin/env cwl-runner | ||
cwlVersion: v1.0 | ||
|
||
class: CommandLineTool | ||
|
||
label: Filter Drugbank database using xsData | ||
|
||
doc: |- | ||
Filter Drugbank database using xsData | ||
|
||
baseCommand: ["conda", "run", "-n", "project_env", "python", "-m", "polus.mm.utils.extract_pdbids_drugbank_xsdata"] | ||
|
||
hints: | ||
DockerRequirement: | ||
dockerPull: ndonyapour/extract_pdbids_drugbank_xsdata | ||
|
||
requirements: | ||
InlineJavascriptRequirement: {} | ||
# Enabling InitialWorkDirRequirement will stage the input Drugbank xml file | ||
InitialWorkDirRequirement: | ||
listing: | ||
- $(inputs.drugbank_xml_file_path) | ||
- $(inputs.drugbank_xsd_file_path) | ||
|
||
inputs: | ||
drugbank_xml_file_path: | ||
label: Path to the Drugbank xml file | ||
doc: |- | ||
Path to the Drugbank xml file | ||
type: File | ||
format: edam:format_2332 | ||
inputBinding: | ||
prefix: --drugbank_xml_file_path | ||
default: | ||
class: File | ||
location: ../../../drugbank/drugbank_5.1.10.xml | ||
|
||
drugbank_xsd_file_path: | ||
label: Path to the Drugbank schema XSD file | ||
doc: |- | ||
Path to the Drugbank schema XSD file | ||
type: File | ||
format: edam:format_3804 | ||
inputBinding: | ||
prefix: --drugbank_xsd_file_path | ||
default: | ||
class: File | ||
location: ../../drugbank/drugbank_5.1.10.xsd | ||
|
||
smiles: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: edam:format_2330 | ||
inputBinding: | ||
prefix: --smiles | ||
default: [] | ||
|
||
inchi: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --inchi | ||
default: [] | ||
|
||
inchi_keys: | ||
label: List of input SMILES # type: | ||
doc: |- | ||
List of input SMILES | ||
Type: string[] | ||
File type: input | ||
Accepted formats: list[string] | ||
type: ["null", {"type": "array", "items": "string"}] | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --inchi_keys | ||
default: [] | ||
|
||
output_txt_path: | ||
label: Path to the text dataset file | ||
doc: |- | ||
Path to the text dataset file | ||
Type: string | ||
File type: output | ||
Accepted formats: txt | ||
type: string | ||
format: | ||
- edam:format_2330 | ||
inputBinding: | ||
prefix: --output_txt_path | ||
default: system.log | ||
|
||
outputs: | ||
output_txt_path: | ||
label: Path to the txt file | ||
doc: |- | ||
Path to the txt file | ||
type: File | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
format: edam:format_2330 | ||
|
||
output_smiles: | ||
label: The Smiles of small molecules | ||
doc: |- | ||
The Smiles of small molecules | ||
type: | ||
type: array | ||
items: string | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove black lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var smiles = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation. We need to duplicate it, so each SMILES string | ||
// corresponds to a PDB ID in the PDB IDs array. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
for (var j = 1; j < words.length; j++) { | ||
smiles.push(words[0]); | ||
} | ||
} | ||
return smiles; | ||
} | ||
|
||
output_pdb_ids: | ||
label: The PDB IDs of target structures | ||
doc: |- | ||
The PDB IDs of target structures | ||
type: | ||
type: array | ||
items: string | ||
outputBinding: | ||
glob: $(inputs.output_txt_path) | ||
loadContents: true | ||
outputEval: | | ||
${ | ||
var lines = self[0].contents.split("\n"); | ||
// remove black lines | ||
lines = lines.filter(function(line) {return line.trim() !== '';}); | ||
var pdbids = []; | ||
for (var i = 0; i < lines.length; i++) { | ||
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7 | ||
// The first item is the SMILES notation and the rest are the target structure PDB IDs. | ||
var words = lines[i].split(",").map(function(item) {return item.trim();}); | ||
for (var j = 1; j < words.length; j++) { | ||
pdbids.push(words[j]); | ||
} | ||
} | ||
return pdbids; | ||
} | ||
|
||
$namespaces: | ||
edam: https://edamontology.org/ | ||
|
||
$schemas: | ||
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
specVersion: "0.1.0" | ||
name: extract_pdbids_drugbank_xsdata | ||
version: 0.1.0 | ||
container: extract-pdbids-drugbank-xsdata-plugin | ||
entrypoint: | ||
title: extract_pdbids_drugbank_xsdata | ||
description: Filter Drugbank database using xsData | ||
author: Brandon Walker, Nazanin Donyapour | ||
contact: [email protected], [email protected] | ||
repository: | ||
documentation: | ||
citation: | ||
|
||
inputs: | ||
- name: drugbank_xml_file_path | ||
required: true | ||
description: Path to the Drugbank xml file | ||
type: File | ||
defaultValue: {'class': 'File', 'location': '../../../drugbank/drugbank_5.1.10.xml'} | ||
format: | ||
uri: edam:format_2332 | ||
- name: drugbank_xsd_file_path | ||
required: true | ||
description: Path to the Drugbank schema XSD file | ||
type: File | ||
defaultValue: {'class': 'File', 'location': '../../drugbank/drugbank_5.1.10.xsd'} | ||
format: | ||
uri: edam:format_3804 | ||
- name: smiles | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: inchi | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: inchi_keys | ||
required: true | ||
description: List of input SMILES, Type string[], File type input, Accepted formats list[string] | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_txt_path | ||
required: true | ||
description: Path to the text dataset file, Type string, File type output, Accepted formats txt | ||
type: string | ||
defaultValue: system.log | ||
format: | ||
uri: edam:format_2330 | ||
outputs: | ||
- name: output_txt_path | ||
required: true | ||
description: Path to the txt file | ||
type: File | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_smiles | ||
required: true | ||
description: The Smiles of small molecules | ||
type: {'type': 'array', 'items': 'string'} | ||
- name: output_pdb_ids | ||
required: true | ||
description: The PDB IDs of target structures | ||
type: {'type': 'array', 'items': 'string'} | ||
ui: | ||
- key: inputs.drugbank_xml_file_path | ||
title: "drugbank_xml_file_path: " | ||
description: "Path to the Drugbank xml file" | ||
type: File | ||
- key: inputs.drugbank_xsd_file_path | ||
title: "drugbank_xsd_file_path: " | ||
description: "Path to the Drugbank schema XSD file" | ||
type: File | ||
- key: inputs.smiles | ||
title: "smiles: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.inchi | ||
title: "inchi: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.inchi_keys | ||
title: "inchi_keys: " | ||
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]" | ||
type: ['null', {'type': 'array', 'items': 'string'}] | ||
- key: inputs.output_txt_path | ||
title: "output_txt_path: " | ||
description: "Path to the text dataset file, Type string, File type output, Accepted formats txt" | ||
type: string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
SMILES5443,PDB8720 | ||
SMILES3441,PDB5533 | ||
SMILES2803,PDB3506 |
Oops, something went wrong.