From ebc1066943361a4b1431db9250f461063ecb9221 Mon Sep 17 00:00:00 2001 From: Abel Legese <73869888+Abellegese@users.noreply.github.com> Date: Thu, 21 Nov 2024 14:38:44 +0300 Subject: [PATCH 1/5] Bug fixes: inch_to_key converter args missing (#1391) * Update config.yml - fix Circle CI pipeline * Bug fixes: inch_to_key converter args missing --------- Co-authored-by: Dhanshree Arora --- ersilia/utils/identifiers/compound.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ersilia/utils/identifiers/compound.py b/ersilia/utils/identifiers/compound.py index a68db6ae6..18c98667b 100644 --- a/ersilia/utils/identifiers/compound.py +++ b/ersilia/utils/identifiers/compound.py @@ -182,7 +182,7 @@ async def process_smiles(self, smiles, semaphore, session, result_list): logger.info("Inchikey converted using PUBCHEM") if inchikey is None: - inchikey = self._nci_smiles_to_inchikey(smiles) + inchikey = self._nci_smiles_to_inchikey(session, smiles) if inchikey: logger.info("Inchikey converted using NCI") From 0d8c24864444bb3eb1ba73f02d5b246bc93778f2 Mon Sep 17 00:00:00 2001 From: musasizivictoria <141638023+musasizivictoria@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:47:35 +0300 Subject: [PATCH 2/5] added tests for is_smile, is_inchikey and guess_type (#1358) Co-authored-by: Dhanshree Arora --- ersilia/utils/identifiers/compound.py | 2 + test/test_compound_identifier.py | 100 +++++++++++++++++++++----- 2 files changed, 83 insertions(+), 19 deletions(-) diff --git a/ersilia/utils/identifiers/compound.py b/ersilia/utils/identifiers/compound.py index 18c98667b..bbe1c44fb 100644 --- a/ersilia/utils/identifiers/compound.py +++ b/ersilia/utils/identifiers/compound.py @@ -56,6 +56,8 @@ def is_key_header(self, h): return h.lower() in self.key_header_synonyms def _is_smiles(self, text): + if not isinstance(text, str) or not text.strip(): + return False if self.Chem is None: return asyncio.run(self._process_pubchem_inchikey(text)) is not None else: diff --git a/test/test_compound_identifier.py b/test/test_compound_identifier.py index 7247a4789..228fa35db 100644 --- a/test/test_compound_identifier.py +++ b/test/test_compound_identifier.py @@ -1,3 +1,5 @@ + +from ersilia.default import UNPROCESSABLE_INPUT import pytest from ersilia.utils.identifiers.compound import CompoundIdentifier from unittest.mock import patch @@ -38,29 +40,89 @@ def test_is_inchikey_positive(compound_identifier, inchikey): """Test that valid InChIKeys return True.""" assert compound_identifier._is_inchikey(inchikey) is True +@pytest.fixture(params=[True, False], ids=["Chem_None", "Chem_Not_None"]) +def compound_identifier(request): + """Fixture that initializes CompoundIdentifier with or without RDKit.""" + return CompoundIdentifier(local=request.param) -@pytest.mark.parametrize("inchikey", [ - "BSYNRYMUTXBXSQUHFFFAOYSA", - "BSYNRYMUTXBXSQ-UHFFFAOYSA-XY", - "12345678901234-1234567890-X", - "BSYNRYMUTXBXSQ_UHFFFAOYSA-N", - "BSYNRYMUTXBXSQ-UHFFFAOYSA" +@pytest.mark.parametrize("smiles, expected", [ + ("C", True), + ("CCO", True) ]) -def test_is_inchikey_negative(compound_identifier, inchikey): - """Test that invalid InChIKeys return False.""" - assert not compound_identifier._is_inchikey(inchikey) +def test_is_smiles_positive(compound_identifier, smiles, expected): + """Test _is_smiles returns True for valid SMILES strings.""" + if compound_identifier.Chem is None: + assert compound_identifier._is_smiles(smiles) == expected - -def test_guess_type_with_inchikey(compound_identifier): - inchikey = "LFQSCWFLJHTTHZ-UHFFFAOYSA-N" +@pytest.mark.parametrize("smiles, expected", [ + ("invalid_smiles", False), + ("", False) +]) +def test_is_smiles_negative(compound_identifier, smiles, expected): + """Test _is_smiles returns False for invalid or empty SMILES strings.""" + assert compound_identifier._is_smiles(smiles) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("BQJCRHHNABKAKU-KBQPJGBKSA-N", True), +]) +def test_is_inchikey_positive(inchikey, expected): + """Test _is_inchikey returns True for valid InChIKey.""" + assert CompoundIdentifier._is_inchikey(inchikey) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("invalid_inchikey", False), + ("BQJCRHHNABKAKU-KBQPJGBKSA", False) +]) +def test_is_inchikey_negative(inchikey, expected): + """Test _is_inchikey returns False for invalid InChIKeys.""" + assert CompoundIdentifier._is_inchikey(inchikey) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("BQJCRHHNABKAKU-KBQPJGBKSA-N", "inchikey"), + ("ABCDEFGHIJKLMN-OPQRSTUVWX-Y", "inchikey"), +]) +def test_guess_type_inchikey(compound_identifier, inchikey, expected): + """Ensure guess_type correctly identifies valid InChIKeys.""" result = compound_identifier.guess_type(inchikey) - assert result == "inchikey" - - -@patch('ersilia.utils.identifiers.compound.CompoundIdentifier._pubchem_smiles_to_inchikey') -def test_is_smiles_positive_chem_none(mock_pubchem, compound_identifier): - compound_identifier.Chem = None - mock_pubchem.return_value = "InChIKey" + assert result == expected, f"Expected 'inchikey', but got '{result}' for input '{inchikey}'" + +@pytest.mark.parametrize("smiles, expected", [ + ("C", "smiles"), + ("CCO", "smiles"), +]) +def test_guess_type_smiles(compound_identifier, smiles, expected): + """Ensure guess_type correctly identifies valid SMILES strings.""" + result = compound_identifier.guess_type(smiles) + assert result == expected, f"Expected 'smiles', but got '{result}' for input '{smiles}'" + +@pytest.mark.parametrize("input_data, expected", [ + (None, UNPROCESSABLE_INPUT), + (UNPROCESSABLE_INPUT, UNPROCESSABLE_INPUT), +]) +def test_guess_type_unprocessable(compound_identifier, input_data, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for None or unprocessable inputs.""" + result = compound_identifier.guess_type(input_data) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}', but got '{result}'" + +@pytest.mark.parametrize("whitespace_input, expected", [ + ("\n", UNPROCESSABLE_INPUT), + ("\t", UNPROCESSABLE_INPUT), + (" ", UNPROCESSABLE_INPUT), +]) +def test_guess_type_whitespace(compound_identifier, whitespace_input, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for whitespace-only input.""" + result = compound_identifier.guess_type(whitespace_input) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{whitespace_input}'" + +@pytest.mark.parametrize("non_char_input, expected", [ + (12345, UNPROCESSABLE_INPUT), + (3.14, UNPROCESSABLE_INPUT), + ("𠜎𠜱𡿺𠬠", UNPROCESSABLE_INPUT), +]) +def test_guess_type_non_character(compound_identifier, non_char_input, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for non-character input.""" + result = compound_identifier.guess_type(non_char_input) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{non_char_input}'" # Test with a valid SMILES input smiles_string = 'CCO' #Ethanol SMILES From 166ba761d301893e693139bb2f2da6eced128158 Mon Sep 17 00:00:00 2001 From: Dhanshree Arora Date: Thu, 21 Nov 2024 22:09:59 +0530 Subject: [PATCH 3/5] Update versions (#1396) --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0e82b9eed..d82e20077 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,10 +50,10 @@ docker = "^7.1.0" boto3 = "^1.28.40" requests = "^2.31.0" numpy = "<=1.26.4" -setuptools = "^65.0.0" # added to fix the issue with setuptools +setuptools = "^70.0.0" # added to fix the issue with setuptools isaura = { version = "0.1", optional = true } aiofiles = "<=24.1.0" -aiohttp = "<=3.10.9" +aiohttp = ">=3.10.11" nest_asyncio = "<=1.6.0" pytest = { version = "^7.4.0", optional = true } pytest-asyncio = { version = "<=0.24.0", optional = true } From 933f1cbb32a5ae3cd6f9a581fbfa3b736b4101d8 Mon Sep 17 00:00:00 2001 From: snufkinwa <68300416+snufkinwa@users.noreply.github.com> Date: Fri, 22 Nov 2024 00:17:13 -0600 Subject: [PATCH 4/5] Remove duplicate flag --repo_path from the fetch command (#1370) (#1381) --- ersilia/cli/commands/fetch.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/ersilia/cli/commands/fetch.py b/ersilia/cli/commands/fetch.py index 7ded3cdac..ac77b412c 100644 --- a/ersilia/cli/commands/fetch.py +++ b/ersilia/cli/commands/fetch.py @@ -23,7 +23,6 @@ def _fetch(mf, model_id): "an EOS folder, then packed to a BentoML bundle", ) @click.argument("model", type=click.STRING) - @click.option("--repo_path", "-r", default=None, type=click.STRING) @click.option("--mode", "-m", default=None, type=click.STRING) @click.option("--dockerize/--not-dockerize", default=False) @click.option( @@ -78,7 +77,6 @@ def _fetch(mf, model_id): ) def fetch( model, - repo_path, mode, dockerize, overwrite, @@ -93,11 +91,9 @@ def fetch( ): if with_bentoml and with_fastapi: raise Exception("Cannot use both BentoML and FastAPI") - if repo_path is not None: - mdl = ModelBase(repo_path=repo_path) - elif from_dir is not None: + + if from_dir is not None: mdl = ModelBase(repo_path=from_dir) - repo_path = from_dir else: mdl = ModelBase(model_id_or_slug=model) model_id = mdl.model_id @@ -106,7 +102,7 @@ def fetch( fg="blue", ) mf = ModelFetcher( - repo_path=repo_path, + repo_path=from_dir, mode=mode, dockerize=dockerize, overwrite=overwrite, From 98618b70ca49feff46d432b6a1630e2f99482b65 Mon Sep 17 00:00:00 2001 From: dhanshreea Date: Fri, 22 Nov 2024 12:29:26 +0530 Subject: [PATCH 5/5] fix circle ci pipeline --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cd5a68984..4082e6d7b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,7 +38,7 @@ jobs: name: Fetch model command: | source activate ersilia - ersilia -v fetch eos0t01 --repo_path ./test/models/eos0t01 + ersilia -v fetch eos0t01 --from_dir ./test/models/eos0t01 - run: name: Delete model command: |