Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pre-process #118

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions pre-process/download-data/download_pdb.wic
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
inputs:
pdb_id:
type: string
format:
- edam:format_1476

steps:
config_tag_pdb:
in:
pdb_id: pdb_id
#filter: "False" # False = do not 'clean' the pdb file.
out:
- output_config_string: !& pdb_id_str
pdb:
in:
output_pdb_path: !ii protein_models.pdb
config: !* pdb_id_str
#config: !ii {pdb_code: 1aki} # from tutorial
#config: !ii {pdb_code: 1enh} # "Structural Studies of the Engrailed Homeodomain" https://pubs.acs.org/doi/10.1021/acs.jpcb.8b02144

# Chignolin model mini-protein
#config: !ii {pdb_code: 1uao} # "10 residue folded peptide designed by segment statistics" http://dx.doi.org/10.1016/j.str.2004.05.022

# Trp-cage model mini-protein (use 1l2y; 2m7d 2m7c 6d37 have chainbreaks and 6d37 is a hexamer)
#config: !ii {pdb_code: 1l2y} # "Designing a 20-residue protein." http://dx.doi.org/10.1038/nsb798
#config: !ii {pdb_code: 2m7d} # "Folding Dynamics and Pathways of the Trp-Cage Miniproteins" https://doi.org/10.1021/bi501021r
#config: !ii {pdb_code: 2m7c} # "Circular Permutation of the Trp-cage: Fold Rescue upon Addition of a Hydrophobic Staple" http://dx.doi.org/10.1039/C3RA43674H
#config: !ii {pdb_code: 6d37} # "Trp-cage tr16b R16Nva : Elimination of pH Dependent Interactions" https://doi.org/10.1002/bip.23260

# Other model mini-proteins
#config: !ii {pdb_code: 5kwp} # "Accurate de novo design of hyperstable constrained peptides." http://dx.doi.org/10.1038/nature19791
#config: !ii {pdb_code: 6b17} # "Design of a short thermally stable alpha-helix embedded in a macrocycle" http://dx.doi.org/10.1002/cbic.201800026
#config: !ii {pdb_code: 1bzv} # "The solution structure of a superpotent B-chain-shortened single-replacement insulin analogue." http://dx.doi.org/10.1110/ps.8.3.499

# Trypsin
#config: !ii {pdb_code: 1trn} # Dimer. This does not download residue 151, but residue 151 is in the pdb file if you download it using a web browser! ???
#config: !ii {pdb_code: 1ntp}
#config: !ii {pdb_code: 1bty} # "IndexError: list index out of range" in str_check_add_hydrogens
extract_model:
in:
config: !ii
models: [1]
# out:
# - output_structure_path: !& protein.pdb
python_script:
in:
script: !ii ../scripts/atomselect.py
dockerPull: !ii jakefennick/atomselect
# Remove any unknown residues that weren't already 'cleaned' in the `pdb` step above.
selection_string: !ii not resname UNK
out:
- output_pdb_path: !& protein.pdb

wic:
graphviz:
label: Molecular\nModeling
steps:
(1, config_tag_pdb):
wic:
graphviz:
label: 'Specify PDB Code'
(2, pdb):
wic:
graphviz:
label: 'Download and Clean\nPDB File'
(3, extract_model):
wic:
graphviz:
label: 'Extract First Model'
(4, python_script):
wic:
graphviz:
label: 'Remove Unknown Residues'
52 changes: 52 additions & 0 deletions pre-process/download-data/download_smiles_ligand_db.wic
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
inputs:
path:
type: string
query:
type: string
max_row:
type: int
smiles_column:
type: string
binding_data_column:
type: string
convert_Kd_dG:
type: boolean
output_txt_path:
type: string

outputs:
output_txt_path:
type: File
format: edam:format_2330
outputSource: download_smiles_ligand_db__step__2__generate_conformers/output_txt_path

steps:
wget_xlsx:
in:
url: path
generate_conformers:
in:
#input_excel_path: # inferred
# query syntax: `column name` 'column value'
query: query #"`Standard Type` == 'Kd' and `duplicate-type-classifier` == 'unique'"
max_row: max_row #1 #25 # Use 1 for CI
smiles_column: smiles_column #SMILES
binding_data_column: binding_data_column #Standard Value
convert_Kd_dG: convert_Kd_dG #'True'
output_txt_path: output_txt_path
output_sdf_path: !ii ligand_conformers.sdf
out:
- output_sdf_path: !& ligand_conformers.sdf

wic:
graphviz:
label: Download Smiles\nLigand Database
steps:
(1, wget_xlsx):
wic:
graphviz:
label: Download Excel File
(2, generate_conformers):
wic:
graphviz:
label: Query Spreadsheet\nGenerate Conformers
Loading