Skip to content

Commit

Permalink
Merge pull request #81 from Steinbeck-Lab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
Kohulan authored Mar 11, 2024
2 parents 3084655 + 8e8a5d8 commit 67f7841
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 186 deletions.
2 changes: 1 addition & 1 deletion CIDER/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
e.g. size, overlap, molecular descriptor distributions, chemical space clustering, etc., most of which can be visually inspected in the notebook.
"""

__version__ = "0.0.1-dev"
__version__ = "1.0.0"

__all__ = [
"CIDER",
Expand Down
70 changes: 43 additions & 27 deletions CIDER/cider.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,59 +295,75 @@ def import_smi_as_data_dict(self, path_to_data: str):
all_dicts = {}
data_dir = os.path.abspath(str(path_to_data))
for dict_name in os.listdir(data_dir):
if (
dict_name[-3:] == "smi"
or dict_name[-3:] == "SMI"
or dict_name[-3:] == "txt"
):
if dict_name.lower().endswith((".smi", ".txt")):
single_dict = {}
dict_path = os.path.join(data_dir, dict_name)

try:
smi_table = pd.read_csv(
dict_path, sep=None, engine="python", header=None
)
except ParserError:
smi_table = pd.read_csv(dict_path, header=None)

for column in range(len(smi_table.columns)):
is_mol = []
for row in range(3):
is_mol.append((Chem.MolFromSmiles(smi_table[column][row])))
if any(is_mol):
is_mol = any(
Chem.MolFromSmiles(smi_table[column][row]) is not None
for row in range(3)
)
if is_mol:
smi_column = column
break

rdkit_mol_list = []
id_column_mapping = {
0: 1,
1: 0,
} # Mapping for ID column based on SMILES column

for mol in smi_table[smi_column]:
rdkit_mol_list.append(Chem.MolFromSmiles(mol))
molecule = Chem.MolFromSmiles(mol)
if molecule:
AllChem.Compute2DCoords(molecule)
rdkit_mol_list.append(molecule)

single_dict[self.import_keyname] = rdkit_mol_list
all_dicts[dict_name] = single_dict

if id:
try:
if smi_column == 1:
id_column = 0
elif smi_column == 0:
id_column = 1
id_list = list(smi_table[id_column])
single_dict[self.database_id_keyname] = id_list
id_column = id_column_mapping.get(smi_column)
if id_column is not None:
id_list = list(smi_table[id_column])
single_dict[self.database_id_keyname] = id_list
else:
raise KeyError("Invalid SMILES column specified.")
except KeyError:
logger.info(
"Cannot find IDs for file %s! SMILES strings and database ID should be the first a d second entry of the files to import the ID."
% (dict_name)
f"Cannot find IDs for file {dict_name}! "
"SMILES strings and database ID should be the first and second entries of the files to import the ID."
)
continue

if not all_dicts:
raise KeyError("No SMI files found in the given directory %s!" % (data_dir))
raise KeyError(f"No *.smi files found in the given directory {data_dir}!")

figure_dict = {}
all_dicts[self.figure_dict_keyname] = figure_dict

self._check_invalid_mols_in_SDF(all_dicts)

logger.info("Created dictionary with keys: %s", list(all_dicts.keys()))

os.chdir(os.path.dirname(data_dir))
if not os.path.exists("output"):
os.mkdir("output")
else:
if os.listdir("output"):
logger.warning(
"Already existing output folder with files! Old data will be overwritten!"
)

output_dir = os.path.join(os.getcwd(), "output")
if not os.path.exists(output_dir):
os.mkdir(output_dir)
elif os.listdir(output_dir):
logger.warning(
"Already existing output folder with files! Old data will be overwritten!"
)

return all_dicts

# Section: Saving figures and images
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ authors:
given-names: "Christoph"
orcid: "https://orcid.org/0000-0001-6966-0814"
title: "ChemIcal DatasEt comparatoR CIDER"
version: 0.0.1-dev
version: 1.0.0
doi: 10.5281/??
date-released: 2022.06.09
url: "https://github.com/hannbus/ChemIcal_DatasEt_compaRator"
Expand Down
293 changes: 138 additions & 155 deletions Workflow/CIDER_Workflow.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
bokeh==2.4.3
chemplot==1.2.0
chemplot @ git+https://github.com/Kohulan/ChemPlot.git@main#egg=chemplot
fpdf2==2.6.0
IPython
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setuptools.setup(
name="cider-chem",
version="0.0.1",
version="1.0.0",
author="Hannah Busch",
author_email="[email protected]",
maintainer="Jonas Schaub, Otto Brinkhaus, Kohulan Rajan",
Expand Down

0 comments on commit 67f7841

Please sign in to comment.