From e3510ee710c10177adc7e199b82c8bcc6596b59c Mon Sep 17 00:00:00 2001 From: Abel Legese <73869888+Abellegese@users.noreply.github.com> Date: Mon, 23 Dec 2024 13:47:51 +0300 Subject: [PATCH 01/11] Update ` model.py` python code to enable rendering in sphinx docs (#1452) * Merge sample command with the example command (#1422) * Merge sample command with the example command * Fix example command usage * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Some import cleanup * Add detailed docstrings and usage examples to Ersilia'a publish module and few update on the utils module (#1448) * Merge sample command with the example command (#1422) * Merge sample command with the example command * Fix example command usage * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Some import cleanup * Few code fixes * Few code fixes * Add detailed docstrings and usage examples to Ersilia'a utils module * Add detailed docstrings and usage examples to Ersilia'a utils module * Merge sample command with the example command (#1422) * Merge sample command with the example command * Fix example command usage * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Some import cleanup --------- Co-authored-by: Dhanshree Arora * fix: correcting the docstring in model.py to be rendered in sphinx doc page --------- Co-authored-by: Dhanshree Arora --- ersilia/core/model.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ersilia/core/model.py b/ersilia/core/model.py index 43d6da7a0..a137be80e 100644 --- a/ersilia/core/model.py +++ b/ersilia/core/model.py @@ -77,25 +77,29 @@ class ErsiliaModel(ErsiliaBase): Examples -------- - Fetching a model: + Fetching a model this requires to use asyncio since `fetch` is a coroutine.: + .. code-block:: python model = ErsiliaModel(model="model_id") model.fetch() Serving a model: + .. code-block:: python model = ErsiliaModel(model="model_id") model.serve() Running a model: + .. code-block:: python model = ErsiliaModel(model="model_id") result = model.run(input="input_data.csv", output="output_data.csv") Closing a model: + .. code-block:: python model = ErsiliaModel(model="model_id") From 2961dfc3a2c543428c57afd24cf3b3dd84c55a54 Mon Sep 17 00:00:00 2001 From: Abel Legese <73869888+Abellegese@users.noreply.github.com> Date: Thu, 26 Dec 2024 12:05:38 +0300 Subject: [PATCH 02/11] Legacy bentoml dockerfile python version upgrade (#1469) * Merge sample command with the example command (#1422) * Merge sample command with the example command * Fix example command usage * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Some import cleanup * fix: async package python version incompatibility in docker image fix. --------- Co-authored-by: Dhanshree Arora --- dockerfiles/dockerize-legacy-bentoml/base/Dockerfile | 2 +- dockerfiles/dockerize-multistage-condapack/base/Dockerfile | 2 +- dockerfiles/dockerize-multistage-condapack/model/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dockerfiles/dockerize-legacy-bentoml/base/Dockerfile b/dockerfiles/dockerize-legacy-bentoml/base/Dockerfile index 8fbdf2087..33d5c59f1 100644 --- a/dockerfiles/dockerize-legacy-bentoml/base/Dockerfile +++ b/dockerfiles/dockerize-legacy-bentoml/base/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7-slim-buster +FROM python:3.10-slim-buster WORKDIR /root ARG BUILD_MODEL diff --git a/dockerfiles/dockerize-multistage-condapack/base/Dockerfile b/dockerfiles/dockerize-multistage-condapack/base/Dockerfile index d366871ca..18068e2e5 100644 --- a/dockerfiles/dockerize-multistage-condapack/base/Dockerfile +++ b/dockerfiles/dockerize-multistage-condapack/base/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.7-slim-buster +FROM python:3.10-slim-buster WORKDIR /root ARG BUILD_MODEL diff --git a/dockerfiles/dockerize-multistage-condapack/model/Dockerfile b/dockerfiles/dockerize-multistage-condapack/model/Dockerfile index e9bc855c7..dc4d3e7b1 100644 --- a/dockerfiles/dockerize-multistage-condapack/model/Dockerfile +++ b/dockerfiles/dockerize-multistage-condapack/model/Dockerfile @@ -14,7 +14,7 @@ RUN /$MODEL/bin/conda-unpack # the ersilia environment, the model environment, # and the model itself (as a bentoml bundle) -FROM python:3.7-slim-buster +FROM python:3.10-slim-buster WORKDIR /root ARG MODEL=eos_identifier ENV MODEL=$MODEL From 0fe2fb408e9fcf40fea9afdd7e376b575f863ceb Mon Sep 17 00:00:00 2001 From: Dhanshree Arora Date: Thu, 26 Dec 2024 17:52:09 +0530 Subject: [PATCH 03/11] Raise error if model directory doesn't exist (#1471) --- ersilia/core/modelbase.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ersilia/core/modelbase.py b/ersilia/core/modelbase.py index b1ef75da9..90a3541d5 100644 --- a/ersilia/core/modelbase.py +++ b/ersilia/core/modelbase.py @@ -6,7 +6,6 @@ from ..hub.fetch import STATUS_FILE, DONE_TAG from ..default import IS_FETCHED_FROM_DOCKERHUB_FILE from ..utils.paths import get_metadata_from_base_dir - from ..utils.exceptions_utils.exceptions import InvalidModelIdentifierError from .. import throw_ersilia_exception @@ -47,8 +46,12 @@ def __init__(self, model_id_or_slug=None, repo_path=None, config_json=None): raise InvalidModelIdentifierError(model=self.text) if repo_path is not None: - self.logger.debug("Repo path specified: {0}".format(repo_path)) - self.logger.debug("Absolute path: {0}".format(os.path.abspath(repo_path))) + self.logger.debug(f"Repo path specified: {repo_path}") + abspath = os.path.abspath(repo_path) + self.logger.debug(f"Absolute path: {abspath}") + # Check if path actually exists + if not os.path.exists(abspath): + raise FileNotFoundError("Model directory does not exist at the provided path. Please check the path and try again.") self.text = self._get_model_id_from_path(repo_path) self.model_id = self.text slug = self._get_slug_if_available(repo_path) From ad004a3093292206422978871d5453163a1da546 Mon Sep 17 00:00:00 2001 From: Dhanshree Arora Date: Thu, 26 Dec 2024 19:18:56 +0530 Subject: [PATCH 04/11] access the correct index to get model identifier (#1472) --- ersilia/cli/commands/delete.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ersilia/cli/commands/delete.py b/ersilia/cli/commands/delete.py index 8875a155c..a52593d8e 100644 --- a/ersilia/cli/commands/delete.py +++ b/ersilia/cli/commands/delete.py @@ -57,6 +57,7 @@ def _delete_all(): model_catalog = ModelCatalog() catalog_table = model_catalog.local() local_models = catalog_table.data if catalog_table else None + idx = catalog_table.columns.index("Identifier") if not local_models: echo( ":person_tipping_hand: No models are available locally for deletion.", @@ -65,11 +66,17 @@ def _delete_all(): return deleted_count = 0 for model_row in local_models: - model_id = model_row[0] - if _delete_model_by_id(model_id): + model_id = model_row[idx] + try: + _delete_model_by_id(model_id) deleted_count += 1 + except Exception as e: + echo( + f":warning: Error deleting model {model_id}: {e}", + fg="red", + ) echo( - ":thumbs_up: Completed the deletion of all locally available models!", + f":thumbs_up: Completed the deletion of {deleted_count} locally available models!", fg="green", ) From b3112b32def0684d1bc55fbbea9c5607dc7eb60e Mon Sep 17 00:00:00 2001 From: Dhanshree Arora Date: Mon, 30 Dec 2024 10:24:16 +0530 Subject: [PATCH 05/11] track served models with their session info so if they are deleted while being served, ewe can safely remove their session data (#1474) --- ersilia/cli/commands/close.py | 3 +- ersilia/cli/commands/serve.py | 3 ++ ersilia/core/session.py | 4 +- ersilia/hub/delete/delete.py | 5 +++ ersilia/utils/session.py | 75 ++++++++++++++++++++++++++++++++++- 5 files changed, 85 insertions(+), 5 deletions(-) diff --git a/ersilia/cli/commands/close.py b/ersilia/cli/commands/close.py index 0856a25c4..584bfef56 100644 --- a/ersilia/cli/commands/close.py +++ b/ersilia/cli/commands/close.py @@ -4,7 +4,7 @@ from .. import echo from ... import ErsiliaModel from ...core.session import Session - +from ...utils.session import deregister_model_session def close_cmd(): """ @@ -35,6 +35,7 @@ def close(): return mdl = ErsiliaModel(model_id, service_class=service_class) mdl.close() + deregister_model_session(model_id) echo(":no_entry: Model {0} closed".format(mdl.model_id), fg="green") return close diff --git a/ersilia/cli/commands/serve.py b/ersilia/cli/commands/serve.py index d92e94edb..be6a5af03 100644 --- a/ersilia/cli/commands/serve.py +++ b/ersilia/cli/commands/serve.py @@ -5,6 +5,7 @@ from ... import ErsiliaModel from ..messages import ModelNotFound from ...store.utils import OutputSource, ModelNotInStore, store_has_model +from ...utils.session import register_model_session def serve_cmd(): @@ -76,6 +77,8 @@ def serve(model, output_source, lake, port, track): if mdl.url is None: echo("No URL found. Service unsuccessful.", fg="red") return + + register_model_session(mdl.model_id, mdl.session._session_dir) echo( ":rocket: Serving model {0}: {1}".format(mdl.model_id, mdl.slug), fg="green" ) diff --git a/ersilia/core/session.py b/ersilia/core/session.py index e077f99bf..2d0e14011 100644 --- a/ersilia/core/session.py +++ b/ersilia/core/session.py @@ -26,8 +26,8 @@ class Session(ErsiliaBase): """ def __init__(self, config_json): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) - session_dir = get_session_dir() - self.session_file = os.path.join(session_dir, SESSION_JSON) + self._session_dir = get_session_dir() + self.session_file = os.path.join(self._session_dir, SESSION_JSON) def current_model_id(self): """ diff --git a/ersilia/hub/delete/delete.py b/ersilia/hub/delete/delete.py index 551a356e5..2a933807c 100644 --- a/ersilia/hub/delete/delete.py +++ b/ersilia/hub/delete/delete.py @@ -16,6 +16,7 @@ from ..bundle.status import ModelStatus from ...default import ISAURA_FILE_TAG, ISAURA_FILE_TAG_LOCAL +from ...utils.session import get_model_session, remove_session_dir, deregister_model_session def rmtree(path): @@ -573,6 +574,10 @@ def can_be_deleted(self, model_id: str) -> Tuple[bool, str]: bool True if the model can be deleted, False otherwise. """ + mdl_session = get_model_session(model_id) + if mdl_session: + remove_session_dir(mdl_session) + deregister_model_session(model_id) needs_delete = self._needs_delete(model_id) mc = ModelCard(config_json=self.config_json).get(model_id) model_source = ModelCatalog(config_json=self.config_json)._get_model_source(mc) diff --git a/ersilia/utils/session.py b/ersilia/utils/session.py index 8d955c5c1..09b72cba1 100644 --- a/ersilia/utils/session.py +++ b/ersilia/utils/session.py @@ -3,7 +3,7 @@ import psutil import json -from ..default import SESSIONS_DIR, LOGS_DIR, CONTAINER_LOGS_TMP_DIR, SESSION_JSON +from ..default import SESSIONS_DIR, LOGS_DIR, CONTAINER_LOGS_TMP_DIR, SESSION_JSON, EOS, MODELS_JSON def get_current_pid(): @@ -93,7 +93,8 @@ def remove_session_dir(session_name): The name of the session. """ session_dir = os.path.join(SESSIONS_DIR, session_name) - shutil.rmtree(session_dir) + if os.path.exists(session_dir): + shutil.rmtree(session_dir) def determine_orphaned_session(): @@ -140,3 +141,73 @@ def get_session_id(): The session ID. """ return f"session_{get_parent_pid()}" + + +def register_model_session(model_id, session_dir): + """ + Register a model with a session. + + Parameters + ---------- + model_id : str + The model ID. + session_dir : str + The session directory. + """ + file_path = os.path.join(EOS, MODELS_JSON) + + if not os.path.exists(file_path): + with open(file_path, "w") as f: + json.dump({}, f, indent=4) + + with open(file_path, "r") as f: + models = json.load(f) + + if ( + model_id not in models + ): # TODO This would have implications when we try to run the same model across multiple sessions + models[model_id] = session_dir + with open(file_path, "w") as f: + json.dump(models, f, indent=4) + + +def get_model_session(model_id): + """ + Get the model session. + + Parameters + ---------- + model_id : str + The model ID. + + Returns + ------- + str + The session ID. + """ + file_path = os.path.join(EOS, MODELS_JSON) + if not os.path.exists(file_path): + return None + with open(file_path, "r") as f: + models = json.load(f) + return models.get(model_id, None) + + +def deregister_model_session(model_id): + """ + Remove a model from a session. + + Parameters + ---------- + model_id : str + The model ID. + """ + file_path = os.path.join(EOS, MODELS_JSON) + if not os.path.exists(file_path): + return + with open(file_path, "r") as f: + models = json.load(f) + if model_id in models: + del models[model_id] + with open(file_path, "w") as f: + json.dump(models, f, indent=4) \ No newline at end of file From 99a0c310fa8217812e715e8354c0053d2a9e49ab Mon Sep 17 00:00:00 2001 From: Dhanshree Arora Date: Mon, 30 Dec 2024 10:35:24 +0530 Subject: [PATCH 06/11] Model incorporation with llms (#1475) * more modifications * scripts to process model metadata and publications * bugfix suggest metadata with llm * Post process response into JSON format for downstream consumption * Added format option to allow serializing to either file format; util function to serialize between json to md * Minor fixes with dumping json response --------- Co-authored-by: Miquel Duran-Frigola --- .github/scripts/suggest_metadata_with_llm.py | 338 +++++++++++++++ .../scripts/summarize_publication_with_llm.py | 409 ++++++++++++++++++ 2 files changed, 747 insertions(+) create mode 100644 .github/scripts/suggest_metadata_with_llm.py create mode 100644 .github/scripts/summarize_publication_with_llm.py diff --git a/.github/scripts/suggest_metadata_with_llm.py b/.github/scripts/suggest_metadata_with_llm.py new file mode 100644 index 000000000..ac996915c --- /dev/null +++ b/.github/scripts/suggest_metadata_with_llm.py @@ -0,0 +1,338 @@ +import os +import argparse +import openai +import requests +import boto3 +import tempfile +import shutil +from dotenv import load_dotenv + +load_dotenv() +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +openai.api_key = OPENAI_API_KEY +MODEL_NAME = "gpt-4o" + + +def read_accepted_labels(category): + category = category.lower().replace(" ", "_") + base_url = "https://raw.githubusercontent.com/ersilia-os/ersilia/refs/heads/master/ersilia/hub/content/metadata/" + url = os.path.join(base_url, category + ".txt") + response = requests.get(url) + if response.status_code == 200: + values = response.text.splitlines() + return [x.strip() for x in values if x] + else: + raise Exception(f"Failed to download labels from {url}") + + +accepted = {} +for category in [ + "input", + "input_shape", + "license", + "mode", + "output", + "output_shape", + "output_type", + "tag", + "task", +]: + accepted[category] = ", ".join(read_accepted_labels(category)) + + +PRIMARY_SYSTEM_PROMPT = """ +You are a biomedical expert and you are asked to annotate metadata for a given computational tool, for example an AI/ML model. +You will be given the following information: +1. A structured report (summary) of a publication. This will be labelled by the user as PUBLICATION REPORT. +2. Some raw and potentially spurious metadata in JSON, YAML or Markdown format. This will be labeled by the user as RAW METADATA. + +Your task is to provide a structured metadata report for the computational tool based on the information provided. +The metadata report should include the following information, in Markdown format: + +# General Information +- Title: Suggest a title for the computational tool. This should be a concise and informative title. It should be coherent with the title of the publication, and it can be inspired by the title in the raw metadata. The title should not be longer than 100 characters. +- Slug: Just take the slug from the publication summary. + +# Description +Write a short summary of the computational tool. This should be a high-level overview of the tool based on the publication summary provided. +You an also get inspiration from the user prompt. +The description should have at least 200 characters. Strictly one paragraph. + +# Tags +Select 1-5 tags from this list: {7} +Give the tags as a list: +- Tag 1 +- Tag 2 +- etc. + +# Input +- Input: One of the following, depending on the type of input accepted for the model: {0} +- Input shape: One of the following, depending on the characteristics of the minimum model input. Typically, models accept a single input. Models that require multiple inputs to do predictions, can be lists, etc.: {1} + +# Output +- Output: One (ideally) or multiple of the following (comma-separated), depending on the type of output produced by the model: {2} +- Output shape: One of the following, depending on the dimensionality of the produced output: {3} +- Output type: One of the following: {4} + +# Mode and Task +- Mode: One of the following, depending on whether the model is already pre-trained by authors, retrained by our team, or build from the data with our own tools: {5} +- Task: AI, ML or data science task. One of the following: {6} + +# Interpretation +Provide a oneliner explaining how to interpret the output of the model. Is it a probability? Is it a regression value for a particular experimental assay? etc. No more than 150 characters. + +# Publication and Code +Provide a brief summary of the publication. You can use the TLDR from the publication report and the publication summary. +This should be a high-level overview of the publication. Between 50 and 100 words. Only one paragraph. No new-line characters. + +In addition, provide the following URLs as a list: +- Publication URL: Provide the URL of the publication. This is exactly the URL provided in the metadata from the user. +- Code URL: Provide the URL of the code repository. This is exactly the URL provided in the metadata from the user. + +# License +- Simply extract the license from the metadata. + +-- + +Below are some general guidelines: +- Produce a Markdown file strictly following the headers and formatting specified above. +- Never do multiple paragraphs. +- Be concise. +- Do not include any special characters, boldface, or italics. +""".format( + accepted["input"], + accepted["input_shape"], + accepted["output"], + accepted["output_shape"], + accepted["output_type"], + accepted["mode"], + accepted["task"], + accepted["tag"], +) + + +PRIMARY_USER_PROMPT = """ +Write the metadata of the following computational tool. + +RAW METADATA + + +PUBLICATION REPORT + +""" + +POSTPROCESS_SYSTEM_PROMPT = """ +Your task is to make sure the format of a Markdown file is correct. You can slightly modify the content, but only if you find +inconsistencies in the text, repetitions, or incoherences. Avoid boldface, italics and special characters. +Strictly follow the format below: +# General information +- Title: The title. No longer than 100 characters. +- Slug: Lower case, can use hyphenation. + +# Description +At least 200 characters, ideally more. Strictly one paragraph. + +# Tags +- Tag 1 +- Tag 2 +- etc. 3 to 5 tags. + +# Input +- Input: As specified in the source file. Do not modify. +- Input shape: As specified in the source file. Do not modify. + +# Output +- Output: As specified in the source file. Do not modify. +- Output shape: As specified in the source file. Do not modify. +- Output type: As specified in the source file. Do not modify. + +# Mode and Task +- Mode: As specified in the source file. Do not modify. +- Task: As specified in the source file. Do not modify. + +# Interpretation +A one-liner. No more than 150 characters. Strictly one paragraph. + +# Publication and Code +A summary as specified in the source file, between 50 and 100 words. Strictly one paragraph. + +- Publication: A URL with the link []() +- Code: A URL with the link []() + +# License +- As specified in the source file. Do not modify. +""" + +POSTPROCESS_USER_PROMPT = """ +Process the following text: +""" + + +class MetadataDownloader(object): + def __init__( + self, + model_id=None, + issue_number=None, + ): + if model_id is None and issue_number is None: + raise Exception("At least one argument is necessary") + if model_id is not None and issue_number is not None: + model_id = None + self.model_id = model_id + self.issue_number = issue_number + self.tmp_folder = tempfile.mkdtemp(prefix="ersilia-") + self.output_file = os.path.join(self.tmp_folder, "metadata.txt") + + def download_by_model_id(self): + base_url = ( + "https://raw.githubusercontent.com/ersilia-os/{0}/refs/heads/main/".format( + self.model_id + ) + ) + metadata_yml = os.path.join(base_url, "metadata.yml") + metadata_json = os.path.join(base_url, "metadata.json") + try: + url = metadata_yml + response = requests.get(url) + if response.status_code == 200: + with open(self.output_file, "wb") as f: + f.write(response.content) + return self.output_file + except: + pass + try: + url = metadata_json + response = requests.get(url) + if response.status_code == 200: + with open(self.output_file, "wb") as f: + f.write(response.content) + return self.output_file + except: + raise Exception("Metadata not found in model repository") + + def download_by_issue_number(self): + # TODO extract metadata from the issue itself + pass + + def download(self): + if self.model_id is not None: + return self.download_by_model_id() + if self.issue_number is not None: + return self.download_by_issue_number() + return None + + +class PublicationSummaryDownloader(object): + def __init__(self, model_id=None, file_path=None): + if model_id is None and file_path is None: + raise Exception("At least one argument is necessary") + if model_id is not None and file_path is not None: + model_id = None + self.model_id = model_id + self.file_path = file_path + self.tmp_folder = tempfile.mkdtemp(prefix="ersilia-") + self.output_file = os.path.join(self.tmp_folder, "input_file.txt") + + def download_by_model_id(self): + s3 = boto3.client("s3") + bucket_name = "publication-summaries" # TODO check path in S3 + key = f"{self.model_id}_summary.md" + try: + with open(self.output_file, "wb") as f: + s3.download_fileobj(bucket_name, key, f) + return self.output_file + except Exception as e: + raise Exception(f"Failed to download file from S3: {str(e)}") + + def download_from_file_path(self): + shutil.copy(self.file_path, self.output_file) + return self.output_file + + def download(self): + if self.model_id is not None: + return self.download_by_model_id() + if self.file_path is not None: + return self.download_from_file_path() + return None + + +class MetadataSuggestor(object): + def __init__(self, metadata_txt, publication_markdown, output_markdown): + self.model_name = MODEL_NAME + self.metadata_txt = metadata_txt + self.publication_markdown = publication_markdown + self.output_markdown = output_markdown + + def make_primary_request(self): + with open(self.metadata_txt, "r") as f: + metadata_txt = f.read() + with open(self.publication_markdown, "r") as f: + publication_markdown = f.read() + system_prompt = PRIMARY_SYSTEM_PROMPT + user_prompt = PRIMARY_USER_PROMPT.strip() + user_prompt = user_prompt.replace("", metadata_txt) + user_prompt = user_prompt.replace("", publication_markdown) + response = openai.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.7, + ) + return response.choices[0].message.content + + def postprocess_markdown(self, text, output_md): + system_prompt = POSTPROCESS_SYSTEM_PROMPT.strip() + user_prompt = POSTPROCESS_USER_PROMPT.strip() + "\n" + text + response = openai.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.7, + ) + text = response.choices[0].message.content + with open(output_md, "w") as f: + f.write(text) + + def run(self): + text = self.make_primary_request() + self.postprocess_markdown(text, self.output_markdown) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process arguments.") + parser.add_argument( + "-m", "--model_id", type=str, default=None, help="Ersilia Model Hub identifier" + ) + parser.add_argument( + "-i", + "--issue_number", + type=int, + default=None, + help="GitHub issue number in the ersilia-os/ersilia repository", + ) + parser.add_argument( + "-s", "--summary_path", type=str, default=None, help="File path" + ) + parser.add_argument( + "-o", + "--output_markdown", + type=str, + default=None, + required=True, + help="Output file in Markdown format", + ) + args = parser.parse_args() + model_id = args.model_id + issue_number = args.issue_number + summary_path = args.summary_path + output_markdown = args.output_markdown + md = MetadataDownloader(model_id, issue_number) + metadata_txt = md.download() + pd = PublicationSummaryDownloader(model_id, summary_path) + publication_markdown = pd.download() + ms = MetadataSuggestor(metadata_txt, publication_markdown, output_markdown) + ms.run() diff --git a/.github/scripts/summarize_publication_with_llm.py b/.github/scripts/summarize_publication_with_llm.py new file mode 100644 index 000000000..7405b8ba6 --- /dev/null +++ b/.github/scripts/summarize_publication_with_llm.py @@ -0,0 +1,409 @@ +import os +import argparse +import tempfile +import PyPDF2 +import openai +import requests +import boto3 +import shutil +import json +from typing import List +from pydantic import BaseModel +# from dotenv import load_dotenv + +# load_dotenv() + +# Authenticate into OpenAI +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +openai.api_key = OPENAI_API_KEY +MODEL_NAME = "gpt-4o" + +# Authenticate into AWS +aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID") +aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") + +if aws_access_key_id and aws_secret_access_key: + boto3.setup_default_session( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) +else: + raise Exception("AWS credentials are not set in the environment variables") + + +PRIMARY_SYSTEM_PROMPT = """ +Your are a biomedical expert. You have to make a structured report of a scientific publication. The publication will be provided as a PDF file. +Your report needs to be concise and informative. Strictly follow this structure, in Markdown format: + +# Publication details +- Title: Just copy the title of the publication +- Authors: List the authors separated by commas. For example, G. Turon, M. Duran-Frigola and D. Arora. +- Journal: Name of the journal +- Year: Year of publication +- Suggested slug: A short version of the title, with hyphens instead of spaces. For example, "deep-learning-for-malaria". The slug should be all lowercase. It cannot be longer than 50 characters and should not contain more than 5 hyphens. If there is a name for the method, for example, chemprop, try to use it in the slug. Do not add dates or names of authors. If possible, do not include words such as ml (for machine learning) or ai (for artificial intelligence). +- Suggested computational title: Suggest a title that is focused on the computational methods used in the publication. For example, "Broad-spectrum antibiotics activity prediction" or "MAIP, antimalarial activity prediction based on multiple industry datasets". + +# TLDR +Write a short summary of the publication in one or two sentences. This should be a high-level overview of the publication. +Do not use new-line characters. The TLDR should have between 100 and 200 characters. + +# Summary +Write a summary of the publication. Feel free to use the information on the Abstract, if available. +The summary should be between 100 and 200 words. Only one paragraph. No new-line characters are allowed. No special characters, boldface, links or references are allowed. +Use a concise style. + +# Relevance to biomedical research +Briefly discuss why the publication is relevant to biomedicine or drug discovery. +If the publication is related to a particular disease or pathogen, make sure to mention it. +If the publication is related to a particular stage of the drug discovery pipeline, make sure to mention it. +This should be between 50 and 100 words. Only one paragraph. No new-line characters. +Use a concise style. + +# Computational methods +Write a summary of the computational methods used in this publication. +If AI/ML methods are used, focus on those. Mention the main techniques and methods. +Try to explain what the input and output of the methods are. In addition, try to explain how to interpret the output and what range of values are expected and relevant (if applicable). +If training data was used, mention the size and source of the data. +If accuracy was reported, make sure to mention it. +The summary should be between 100 and 200 words. Only one paragraph. No new-line characters are allowed. No special characters, boldface, links or references are allowed. +Use a concise style. + +# Biomedical keywords +Suggest 3 to 5 keywords that are relevant to the publication. These keywords should be related to the biomedical or drug discovery aspects of the publication. +If the publication is not related to biomedicine, do not suggest any keywords. +- Keyword 1 +- Keyword 2 +- Keyword 3 + +# Computational keywords +Suggest 3 to 5 keywords that are relevant to the publication. These keywords should be related to the computational aspects of the publication, especially the AI/ML aspects, if applicable. +Do not use keywords that are too generic, such as "machine learning", "deep learning" or "descriptors". +- Keyword 1 +- Keyword 2 +- Keyword 3 + +# Strenghts +Discuss the strengths of the publication, especially from the perspective of the computational methods and the training dataset, if applicable. +Why are the results of the publication important or relevant? What are the main contributions of the publication? +This should be between 50 and 100 words. Only one paragraph. No new-line characters. + +# Limitations +Discuss the limitations of the publication, especially from the perspective of the computational methods and the training dataset, if applicable. +What could be improved in the publication? What are the main weaknesses? Are the computational methods novel? Are the results reliable? Are the conclusions valid? +Is the dataset large enough? Is the data of high quality? +This should be between 50 and 100 words. Only one paragraph. No new-line characters. + +# Overall relevance +Try to assess the relevance of the publication in the context of the current knowledge in the field. +In your assessment, consider the novelty of the methods, the quality of the results, and the potential impact of the publication. +The date of publication is also important. +The size of the dataset and the quality of the data are also important factors to consider. +If prior art on the topic or similar articles exist in the literature, this should penalize the relevance. Novelty is important. +Also consider the performance of the computational methods, and compare it with the performance of other methods. +The impact factor of the journal is an important factor for relevance. Higher relevance should be given to higher impact journals. +Do not be over-emphatic. Try to be explicit about the high, medium or low relevance of the publication. Not all publications are highly relevant, so do a fair assessment. +This should be between 50 and 100 words. Only one paragraph. No new-line characters. Be concise. + +--- + +Below are some style guidelines for the report: +- Always use the third person and do not use personal pronouns. +- Do not start paragraphs with sentences such as "this study", "this publication", "this report" or "the authors". Directly explain the content. For example, do not say "This study develops a method for...". Instead, say "A method was developed for..." or similar. +- Do not include references or links. +- Do not include any special characters, boldface, or italics. +- Each section should be a separate paragraph (one and only one paragraph per section) or a bullet point list, as applicable. +- Do not include any information about the number of pages of the publication. +- Do not mention the figures or tables in the publication. +- Do not mention other references in the publication. +- Do not mention funding sources or acknowledgements. +- Do not begin your answer with a sentence like: "here is a structured report of the publication". Start with the report itself. +- Do not end your answer with a conclusion or a summary. The report should be self-contained. +""" + +PRIMARY_USER_PROMPT = """ +Make a structured report of the scientific publication from the PDF file. The following is the extracted text from the PDF: +""" + +POSTPROCESS_SYSTEM_PROMPT = """ +You have to standardize the format of a structured report that may contain formatting errors or might require slight modifications. +You will be provided with a text file with the report. You need to make sure that the report follows the correct format. Do not make drastic changes. +The format is as follows: + +# Publication details +- Title: Title of the publication +- Authors: List of authors. For example, G. Turon, M. Duran-Frigola and D. Arora. Always abbreviate the given names. +- Journal: Name of the journal. For example, Nature Biotechnology. +- Year: Year of publication. For example, 2024. +- Suggested slug: lowercase, maximum 5 hyphens and maxiumum 50 characters. No dates or author names. If possible, no generic abbreviations such as "ml" or "ai". + +# TLDR +One paragraph only. Between 100 and 200 characters. + +# Summary +One paragraph only. Between 100 and 200 words. + +# Relevance to biomedical research +One paragraph only. Between 50 and 100 words. + +# Computational methods +One paragraph only. Between 100 and 200 words. + +# Biomedical keywords +Keywords as a list and without repeats or redundancy. Minim 3 and maximum 5 keywords. For example: +- Malaria +- Plasmodium falciparum +- Asexual blood stage + +# Computational keywords +Keywords as a list and without repeats or redundancy. Minim 3 and maximum 5 keywords. For example: +- Physicochemical descriptors +- Feature selection +- Support vector machines + +# Strenghts +One paragraph only. Between 50 and 100 words. + +# Limitations +One paragraph only. Between 50 and 100 words. + +# Overall relevance +One paragraph only. Between 50 and 100 words. + +--- + +Below are a few general rules: +- Rephrase if necessary if the content is not clear or concise, but do not make big changes. Just make the small necessary adjustments to follow the guidelines. +- Do not include any special characters, boldface, or italics. +- Do not include any references or links. +- The report should be self-contained and not require additional information. +- It is a Markdown file, so make sure to follow the Markdown syntax. All sections are title sections (#) and the lists are bullet points (-). +- Avoid using double spaces or double new lines. Use only one space or one new line. +- Correct any inconsistency. For example, if the model is deemed to be highly relevant in a section, and of medium relevance elsewhere, harmonize this. Be conservative. + +Make sure that the report follows the correct format. Do not make drastic changes. +""" + +POSTPROCESS_USER_PROMPT = """ +Reformat, if necessary, the following publication report: +""" + +class Summary(BaseModel): + title: str + authors: str + journal: str + year: int + suggested_slug: str + suggested_computational_title: str + tldr: str + summary: str + relevance_to_biomedical_research: str + computational_methods: str + biomedical_keywords: list[str] + computational_keywords: list[str] + strengths: str + limitations: str + overall_relevance: str + + +class PublicationPDFDownloader: + def __init__(self, model_id=None, issue_number=None, url=None, file_path=None): + if ( + model_id is None + and issue_number is None + and url is None + and file_path is None + ): + raise Exception("At least one argument is necessary") + if model_id is not None: + if issue_number is not None or url is not None or file_path is not None: + raise Exception("Only one argument is accepted") + if issue_number is not None: + if model_id is not None or url is not None or file_path is not None: + raise Exception("Only one argument is accepted") + if url is not None: + if ( + model_id is not None + or issue_number is not None + or file_path is not None + ): + raise Exception("Only one argument is accepted") + if file_path is not None: + if model_id is not None or issue_number is not None or url is not None: + raise Exception("Only one argument is accepted") + self.model_id = model_id + self.issue_number = issue_number + self.url = url + self.file_path = file_path + self.tmp_folder = tempfile.mkdtemp(prefix="ersilia-") + self.output_file = os.path.join(self.tmp_folder, "publication.pdf") + + def download_by_model_id(self): + s3 = boto3.client("s3") + bucket_name = "model-publications" + key = f"{self.model_id}_publication.pdf" + try: + with open(self.output_file, "wb") as f: + s3.download_fileobj(bucket_name, key, f) + return self.output_file + except Exception as e: + raise Exception(f"Failed to download file from S3: {str(e)}") + + def download_by_issue_number(self): + # TODO download publication PDF from a TBD repository where publications are stored by issue number + pass + + def download_from_url(self): + response = requests.get(self.url) + if response.status_code == 200: + with open(self.output_file, "wb") as f: + f.write(response.content) + return self.output_file + else: + raise Exception( + f"Failed to download file from URL: {self.url}, status code: {response.status_code}" + ) + + def download_from_file_path(self): + shutil.copy(self.file_path, self.output_file) + return self.output_file + + def download(self): + if self.model_id is not None: + return self.download_by_model_id() + if self.issue_number is not None: + return self.download_by_issue_number() + if self.url is not None: + return self.download_from_url() + if self.file_path is not None: + return self.download_from_file_path() + return None + +class PublicationSummarizer: + def __init__(self, publication_pdf, output_file, format="json"): + self.tmp_dir = tempfile.mkdtemp(prefix="ersilia-") # TODO Unused + self.model_name = MODEL_NAME + self.publication_pdf = publication_pdf + self.output_file = output_file + self.format = format + + def extract_text_from_pdf(self, pdf_path): + with open(pdf_path, "rb") as file: + reader = PyPDF2.PdfReader(file) + text = "" + for page_num in range(len(reader.pages)): + page = reader.pages[page_num] + text += page.extract_text() + return text + + def make_primary_request(self, extracted_text): + system_prompt = PRIMARY_SYSTEM_PROMPT.strip() + user_prompt = PRIMARY_USER_PROMPT.strip() + "\n" + extracted_text + response = openai.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.7, + ) + return response.choices[0].message.content + + def postprocess_response(self, text_response, output_file): + system_prompt = POSTPROCESS_SYSTEM_PROMPT.strip() + user_prompt = POSTPROCESS_USER_PROMPT.strip() + "\n" + text_response + if self.format == "json": + response = openai.beta.chat.completions.parse( + model=self.model_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.7, + response_format=Summary, + ) + with open(output_file, "w") as f: + json.dump(response.choices[0].message.parsed.model_dump(), f, indent=4) + + elif self.format == "markdown": + response = openai.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.7, + ) + text_response = response.choices[0].message.content + with open(output_file, "w") as f: + f.write(text_response) + + else: + raise ValueError("Invalid format. Please choose either 'json' or 'markdown'.") + + def run(self): + text = self.extract_text_from_pdf(self.publication_pdf) + text_response = self.make_primary_request(text) + self.postprocess_response(text_response, self.output_file) + + +def serialize_json_to_md(json_file, md_file): + with open(json_file, "r") as f: + data = json.load(f) + with open(md_file, "w") as f: + for key, value in data.items(): + f.write(f"## {str(key).title()}\n") + if isinstance(value, list): + for item in value: + f.write(f"- {str(item)}\n") + else: + f.write(str(value) + "\n") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Process arguments.") + parser.add_argument( + "-m", "--model_id", type=str, default=None, help="Ersilia Model Hub identifier" + ) + parser.add_argument( + "-i", + "--issue_number", + type=int, + default=None, + help="GitHub issue number in the ersilia-os/ersilia repository", + ) + parser.add_argument( + "-u", "--url", type=str, default=None, help="URL of the downloadable file" + ) + parser.add_argument( + "-f", + "--file_path", + type=str, + default=None, + help="File path of the publication PDF", + ) + parser.add_argument( + "-o", + "--output_file", + type=str, + default=None, + required=True, + help="Output file path for the summary", + ) + parser.add_argument( + "-F", + "--format", + type=str, + default="json", + required=False, + help="File format in which to save the summary, defaults to JSON" + ) + args = parser.parse_args() + model_id = args.model_id + issue_number = args.issue_number + url = args.url + file_path = args.file_path + output_file = args.output_file + output_format = args.format + ppd = PublicationPDFDownloader(model_id, issue_number, url, file_path) + publication_pdf = ppd.download() + ps = PublicationSummarizer(publication_pdf, output_file, output_format) + ps.run() From d33dcbe51c3c109592e7c96cd884d98b9a26deb1 Mon Sep 17 00:00:00 2001 From: Dhanshree Arora Date: Mon, 30 Dec 2024 11:25:33 +0530 Subject: [PATCH 07/11] Make python api work (#1476) * Make python api work * Make python api work --- ersilia/core/model.py | 42 ++++++++++++------------------------------ test/test_models.py | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/ersilia/core/model.py b/ersilia/core/model.py index a137be80e..7831be542 100644 --- a/ersilia/core/model.py +++ b/ersilia/core/model.py @@ -4,15 +4,15 @@ import time import types import asyncio -import importlib import collections -import __main__ as main +import sys from click import secho as echo # Style-aware echo from .. import logger from ..serve.api import Api from .session import Session +from ..hub.fetch.fetch import ModelFetcher from .base import ErsiliaBase from ..lake.base import LakeBase from ..utils import tmp_pid_file @@ -128,7 +128,7 @@ def __init__( else: self.logger.set_verbosity(0) else: - if not hasattr(main, "__file__"): + if hasattr(sys, 'ps1'): self.logger.set_verbosity(0) self.save_to_lake = save_to_lake if self.save_to_lake: @@ -152,6 +152,7 @@ def __init__( self.service_class = service_class mdl = ModelBase(model) self._is_valid = mdl.is_valid() + assert self._is_valid, "The identifier {0} is not valid. Please visit the Ersilia Model Hub for valid identifiers".format( model ) @@ -174,13 +175,11 @@ def __init__( self.logger.debug("Unable to capture user input. Fetching anyway.") do_fetch = True if do_fetch: - fetch = importlib.import_module("ersilia.hub.fetch.fetch") - mf = fetch.ModelFetcher( + mf = ModelFetcher( config_json=self.config_json, credentials_json=self.credentials_json ) asyncio.run(mf.fetch(self.model_id)) - else: - return + self.api_schema = ApiSchema( model_id=self.model_id, config_json=self.config_json ) @@ -213,30 +212,13 @@ def __init__( def fetch(self): """ - Fetch the model if not available locally. - - This method fetches the model from the Ersilia Model Hub if it is not available locally. + This method fetches the model from the Ersilia Model Hub. """ - if not self._is_available_locally and self.fetch_if_not_available: - self.logger.info("Model is not available locally") - try: - do_fetch = yes_no_input( - "Requested model {0} is not available locally. Do you want to fetch it? [Y/n]".format( - self.model_id - ), - default_answer="Y", - ) - except: - self.logger.debug("Unable to capture user input. Fetching anyway.") - do_fetch = True - if do_fetch: - fetch = importlib.import_module("ersilia.hub.fetch.fetch") - mf = fetch.ModelFetcher( - config_json=self.config_json, credentials_json=self.credentials_json - ) - asyncio.run(mf.fetch(self.model_id)) - else: - return + mf = ModelFetcher( + config_json=self.config_json, credentials_json=self.credentials_json + ) + asyncio.run(mf.fetch(self.model_id)) + def __enter__(self): """ diff --git a/test/test_models.py b/test/test_models.py index 630e562bf..91e1dd490 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -87,7 +87,7 @@ def mock_run(): @patch("ersilia.core.model.ErsiliaModel") -def test_models( +def test_model_with_prior_fetching( mock_ersilia_model, mock_fetcher, mock_session, @@ -124,3 +124,42 @@ def test_models( assert mock_serve.called assert mock_run.called assert mock_close.called + + +@patch("ersilia.core.model.ErsiliaModel") +def test_model_with_no_prior_fetching( + mock_ersilia_model, + mock_fetcher, + mock_session, + mock_set_apis, + mock_convn_api_get_apis, + mock_api_task, + mock_serve, + mock_run, + mock_close, +): + MODEL_ID = MODELS[1] + INPUT = "CCCC" + + em = ErsiliaModel( + model=MODEL_ID, service_class="docker", output_source="LOCAL_ONLY" + ) + + em.fetch() + + result = em.run( + input=INPUT, + output="result.csv", + batch_size=100, + track_run=False, + try_standard=False, + ) + + em.serve() + em.close() + + assert result == RESULTS[1] + assert mock_fetcher.called + assert mock_serve.called + assert mock_run.called + assert mock_close.called From 6a2b96afac71fbc2b4a52a584fc365704e5ff7ec Mon Sep 17 00:00:00 2001 From: Miquel Duran-Frigola Date: Mon, 30 Dec 2024 12:28:48 +0100 Subject: [PATCH 08/11] update version and release date [skip ci] --- .github/scripts/airtableops.py | 2 +- .../scripts/summarize_publication_with_llm.py | 14 +- CITATION.cff | 2 +- README.md | 104 ++-- codemeta.json | 4 +- docs/conf.py | 9 +- ersilia/_static_version.py | 2 +- ersilia/cli/cmd.py | 2 +- ersilia/cli/commands/catalog.py | 1 + ersilia/cli/commands/close.py | 2 + ersilia/cli/commands/delete.py | 5 +- ersilia/cli/commands/example.py | 5 +- ersilia/cli/commands/fetch.py | 8 +- ersilia/cli/commands/info.py | 1 + ersilia/cli/commands/publish.py | 1 + ersilia/cli/commands/run.py | 1 + ersilia/cli/commands/serve.py | 3 +- ersilia/cli/commands/setup.py | 1 + ersilia/cli/commands/test.py | 65 +- ersilia/cli/create_cli.py | 1 + ersilia/cli/echo.py | 1 + ersilia/cli/messages.py | 4 + ersilia/core/model.py | 14 +- ersilia/core/modelbase.py | 5 +- ersilia/core/session.py | 1 + ersilia/db/environments/localdb.py | 36 +- ersilia/db/environments/managers.py | 2 +- ersilia/db/hubdata/localslugs.py | 32 +- ersilia/hub/bundle/bundle.py | 6 +- ersilia/hub/bundle/repo.py | 8 +- ersilia/hub/content/base_information.py | 555 ++++++++++++++++++ ersilia/hub/content/card.py | 490 +--------------- ersilia/hub/content/catalog.py | 6 +- ersilia/hub/content/columns/data_types.txt | 4 + .../content/columns/desired_directions.txt | 3 + ersilia/hub/content/columns_information.py | 125 ++++ ersilia/hub/content/information.py | 15 + .../hub/content/metadata/biomedical_area.txt | 3 + ersilia/hub/content/metadata/output.txt | 5 +- .../content/metadata/output_consistency.txt | 3 + .../hub/content/metadata/publication_type.txt | 3 + ersilia/hub/content/metadata/source.txt | 2 + ersilia/hub/content/metadata/source_type.txt | 3 + ersilia/hub/content/metadata/subtask.txt | 6 + .../hub/content/metadata/target_organism.txt | 3 + ersilia/hub/content/metadata/task.txt | 4 +- ersilia/hub/content/search.py | 1 + ersilia/hub/content/slug.py | 1 + ersilia/hub/delete/delete.py | 20 +- ersilia/hub/fetch/actions/get.py | 35 +- ersilia/hub/fetch/actions/inform.py | 1 + ersilia/hub/fetch/actions/lake.py | 2 + ersilia/hub/fetch/actions/sniff_bentoml.py | 4 +- ersilia/hub/fetch/actions/sniff_fastapi.py | 2 + ersilia/hub/fetch/fetch.py | 37 +- ersilia/hub/fetch/fetch_bentoml.py | 1 + ersilia/hub/fetch/fetch_fastapi.py | 1 + ersilia/hub/fetch/inner_template/pack.py | 2 + .../hub/fetch/inner_template/src/service.py | 6 +- ersilia/hub/fetch/lazy_fetchers/dockerhub.py | 4 +- ersilia/hub/fetch/pack/bentoml_pack/mode.py | 7 +- .../hub/fetch/pack/bentoml_pack/runners.py | 8 +- .../hub/fetch/pack/fastapi_pack/runners.py | 2 + ersilia/hub/fetch/register/register.py | 5 +- .../hub/fetch/register/standard_example.py | 1 + ersilia/hub/pull/pull.py | 1 + ersilia/io/annotated.py | 1 + ersilia/io/dataframe.py | 1 + ersilia/io/input.py | 10 +- ersilia/io/output.py | 11 +- ersilia/io/output_logger.py | 1 + ersilia/io/pure.py | 1 + ersilia/io/readers/file.py | 10 + ersilia/io/readers/pyinput.py | 1 + ersilia/io/shape.py | 4 + ersilia/io/types/compound.py | 1 + ersilia/io/types/text.py | 1 + ersilia/lake/base.py | 1 + ersilia/lake/interface.py | 1 + ersilia/lake/manager.py | 1 + ersilia/lake/s3_logger.py | 1 + ersilia/publish/deploy.py | 11 +- ersilia/publish/dockerhub.py | 1 + ersilia/publish/inspect.py | 205 +++---- ersilia/publish/publish.py | 1 + ersilia/publish/rebase.py | 17 +- ersilia/publish/s3.py | 5 +- ersilia/publish/store.py | 2 + ersilia/publish/test.py | 36 +- ersilia/serve/api.py | 2 +- ersilia/serve/services.py | 12 +- ersilia/serve/standard_api.py | 8 +- ersilia/setup/baseconda.py | 17 +- ersilia/setup/basedocker.py | 5 +- ersilia/setup/requirements/bentoml.py | 2 + ersilia/setup/requirements/compound.py | 2 + ersilia/setup/requirements/conda.py | 1 + ersilia/setup/requirements/docker.py | 1 + ersilia/setup/requirements/eospath.py | 1 + ersilia/setup/requirements/git.py | 2 + ersilia/setup/requirements/isaura.py | 1 + ersilia/setup/requirements/ping.py | 1 + ersilia/setup/utils/clone.py | 1 + .../bentoml/configuration/configparser.py | 4 +- ersilia/utils/conda.py | 32 +- ersilia/utils/config.py | 4 + ersilia/utils/csvfile.py | 1 + ersilia/utils/docker.py | 3 + ersilia/utils/download.py | 9 +- ersilia/utils/dvc.py | 3 + .../base_information_exceptions.py | 281 +++++++++ .../utils/exceptions_utils/card_exceptions.py | 201 +------ ersilia/utils/exceptions_utils/exceptions.py | 1 + ersilia/utils/hdf5.py | 3 + ersilia/utils/identifiers/compound.py | 1 + ersilia/utils/identifiers/file.py | 1 + ersilia/utils/identifiers/long.py | 1 + ersilia/utils/identifiers/model.py | 1 + ersilia/utils/identifiers/protein.py | 1 + ersilia/utils/identifiers/short.py | 1 + ersilia/utils/identifiers/text.py | 3 +- ersilia/utils/installers.py | 15 +- ersilia/utils/logging.py | 1 + ersilia/utils/paths.py | 2 + ersilia/utils/remove.py | 1 + ersilia/utils/session.py | 15 +- ersilia/utils/system.py | 1 + ersilia/utils/uninstall.py | 1 + ersilia/utils/upload.py | 1 + ersilia/utils/venv.py | 1 + ersilia/utils/versioning.py | 1 + ersilia/utils/zip.py | 1 + pyproject.toml | 4 +- test/cli/test_fetch.py | 5 +- test/playground/commands.py | 8 +- test/test_compound_identifier.py | 1 - test/test_url_search.py | 5 +- 137 files changed, 1650 insertions(+), 1057 deletions(-) create mode 100644 ersilia/hub/content/base_information.py create mode 100644 ersilia/hub/content/columns/data_types.txt create mode 100644 ersilia/hub/content/columns/desired_directions.txt create mode 100644 ersilia/hub/content/columns_information.py create mode 100644 ersilia/hub/content/metadata/biomedical_area.txt create mode 100644 ersilia/hub/content/metadata/output_consistency.txt create mode 100644 ersilia/hub/content/metadata/publication_type.txt create mode 100644 ersilia/hub/content/metadata/source.txt create mode 100644 ersilia/hub/content/metadata/source_type.txt create mode 100644 ersilia/hub/content/metadata/subtask.txt create mode 100644 ersilia/hub/content/metadata/target_organism.txt create mode 100644 ersilia/utils/exceptions_utils/base_information_exceptions.py diff --git a/.github/scripts/airtableops.py b/.github/scripts/airtableops.py index 718b8e322..3598715ac 100644 --- a/.github/scripts/airtableops.py +++ b/.github/scripts/airtableops.py @@ -121,7 +121,7 @@ def __init__(self, model_id, config_json=None): self.model_id = model_id def read_information(self): - print ("Cannot read directly from README file. Using AirTable instead") + print("Cannot read directly from README file. Using AirTable instead") am = AirtableMetadata(model_id=self.model_id) bi = am.read_information() print(bi.as_dict()) diff --git a/.github/scripts/summarize_publication_with_llm.py b/.github/scripts/summarize_publication_with_llm.py index 7405b8ba6..72ffc2e0d 100644 --- a/.github/scripts/summarize_publication_with_llm.py +++ b/.github/scripts/summarize_publication_with_llm.py @@ -9,6 +9,7 @@ import json from typing import List from pydantic import BaseModel + # from dotenv import load_dotenv # load_dotenv() @@ -187,6 +188,7 @@ Reformat, if necessary, the following publication report: """ + class Summary(BaseModel): title: str authors: str @@ -278,9 +280,10 @@ def download(self): return self.download_from_file_path() return None + class PublicationSummarizer: def __init__(self, publication_pdf, output_file, format="json"): - self.tmp_dir = tempfile.mkdtemp(prefix="ersilia-") # TODO Unused + self.tmp_dir = tempfile.mkdtemp(prefix="ersilia-") # TODO Unused self.model_name = MODEL_NAME self.publication_pdf = publication_pdf self.output_file = output_file @@ -336,9 +339,11 @@ def postprocess_response(self, text_response, output_file): text_response = response.choices[0].message.content with open(output_file, "w") as f: f.write(text_response) - + else: - raise ValueError("Invalid format. Please choose either 'json' or 'markdown'.") + raise ValueError( + "Invalid format. Please choose either 'json' or 'markdown'." + ) def run(self): text = self.extract_text_from_pdf(self.publication_pdf) @@ -358,6 +363,7 @@ def serialize_json_to_md(json_file, md_file): else: f.write(str(value) + "\n") + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Process arguments.") parser.add_argument( @@ -394,7 +400,7 @@ def serialize_json_to_md(json_file, md_file): type=str, default="json", required=False, - help="File format in which to save the summary, defaults to JSON" + help="File format in which to save the summary, defaults to JSON", ) args = parser.parse_args() model_id = args.model_id diff --git a/CITATION.cff b/CITATION.cff index 5cb6bbdda..b2b4fce09 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -11,7 +11,7 @@ authors: given-names: Miquel orcid: https://orcid.org/0000-0002-9906-6936 title: 'Ersilia Model Hub: a repository of AI/ML models for neglected tropical diseases' -version: 0.1.39 +version: 0.1.40 doi: 10.5281/zenodo.7274645 date-released: '' url: https://github.com/ersilia-os/ersilia diff --git a/README.md b/README.md index 922c9d939..1c1ae9d5d 100644 --- a/README.md +++ b/README.md @@ -20,68 +20,80 @@ ## Project Description -The [Ersilia Model Hub](https://ersilia.io) is a unified platform of pre-trained AI/ML models dedicated to 🦠 infectious and neglected disease research. Our mission is to offer an open-source, 🛠 low-code solution that provides seamless access to AI/ML models for 💊 drug discovery. Models housed in our hub come from two sources: +The [Ersilia Model Hub](https://ersilia.io) is a unified platform of pre-trained AI/ML models for 🦠 infectious and neglected disease research. Our mission is to offer an open-source, 🛠 low-code solution that provides seamless access to AI/ML models for 💊 drug discovery. Models housed in our hub come from two sources: -1. 📚 Published models from literature (with due third-party acknowledgement) -2. 🛠 Custom models developed by the **Ersilia team** or our valued contributors. +- Published models from literature (with due third-party acknowledgement) +- Custom models developed by the Ersilia team or our valued contributors -* Read more about the project in the [Ersilia Book](https://ersilia.gitbook.io/ersilia-book/) -* Browse available models in the [Ersilia Model Hub](https://ersilia.io/model-hub/) +You can read more about the project in the [Ersilia Book](https://ersilia.gitbook.io/ersilia-book/) and browse available models in the [Ersilia Model Hub](https://ersilia.io/model-hub/). ## Quick Start Guide -Please check the package requirements in the [Installation Guide](https://ersilia.gitbook.io/ersilia-book/quick-start/installation). The next steps are a quickstart guide to installing Ersilia. +Please check the package requirements in the [Installation Guide](https://ersilia.gitbook.io/ersilia-book/quick-start/installation). The following steps are a quick start guide to using Ersilia. -1. Create a conda environment and activate it +First, create a conda environment and activate it: - ```bash - conda create -n ersilia python=3.10 - conda activate ersilia - ``` +```bash +conda create -n ersilia python=3.10 +conda activate ersilia +``` -1. Clone this repository and install with pip +Then, clone this repository and install with `pip`: - ```bash - git clone https://github.com/ersilia-os/ersilia.git - cd ersilia - pip install -e . - ``` +```bash +git clone https://github.com/ersilia-os/ersilia.git +cd ersilia +pip install -e . +``` -1. Once the Ersilia Model Hub is installed, you can use the CLI to run predictions. First, select a model from the [Ersilia Model Hub](https://ersilia.io/model-hub/) and **fetch** it: +Alternatively, you can directly install from PyPi: +```bash +pip install ersilia +``` - ```bash - ersilia fetch retrosynthetic-accessibility - ``` +Once the Ersilia package is installed, you can use the CLI to run predictions. First, select a model from the [Ersilia Model Hub](https://ersilia.io/model-hub/) and fetch it: -1. Generate a few (5) example molecules, to be used as input. The **example** command will generate the adequate input for the model in use +```bash +ersilia fetch eos4e40 +``` - ```bash - ersilia example retrosynthetic-accessibility -n 5 -f my_molecules.csv - ``` +Note that you can use the model identifier (eos4e40) or its human-readable slug (antibiotic-activity). -1. Then, **serve** your model: +Now you can serve the model: - ```bash - ersilia serve retrosynthetic-accessibility - ``` +```bash +ersilia serve eos4e40 +``` -1. And **run** the model: +To view some information of the model, type the following: - ```bash - ersilia run -i my_molecules.csv -o my_predictions.csv - ``` +```bash +ersilia info +``` -1. Finally, **close** the service when you are done. +The simplest way to run a model is by passing a CSV file as input. If you don't have one, you can generate it easily. In this case, we take 5 molecules as an example: - ```bash - ersilia close - ``` +```bash +ersilia example -n 5 -f my_input.csv +``` -1. If you no longer want to use the model, you can **delete** it. +Now you can run the model: - ```bash - ersilia delete retrosynthetic-accessibility - ``` +```bash +ersilia run -i my_input.csv -o my_output.csv +``` + +To stop the service, you can simply close the model: + +```bash +ersilia close +``` + +Finally, if you don't want to use the model anymore, delete it as follows: + +```bash +ersilia delete eos4e40 +``` Please see the [Ersilia Book](https://ersilia.gitbook.io/ersilia-book/) for more examples and detailed explanations. @@ -89,8 +101,8 @@ Please see the [Ersilia Book](https://ersilia.gitbook.io/ersilia-book/) for more The Ersilia Model Hub is a Free, Open Source Software and we highly value new contributors. There are several ways in which you can contribute to the project: -* A good place to start is checking open [issues](https://github.com/ersilia-os/ersilia/issues). -* If you have identified a bug in the code, please open a new issue using the bug template. +* A good place to start is checking open [issues](https://github.com/ersilia-os/ersilia/issues) +* If you have identified a bug in the code, please open a new issue using the bug template * Share any feedback with the community using [GitHub Discussions](https://github.com/ersilia-os/ersilia/discussions) for the project * Check our [Contributing Guide](https://github.com/ersilia-os/ersilia/blob/master/CONTRIBUTING.md) for more details @@ -100,7 +112,7 @@ The Ersilia Open Source Initiative adheres to the [Contributor Covenant](https:/ If you want to incorporate a new model in the platform, open a new issue using the [model request template](https://github.com/ersilia-os/ersilia/issues/new?assignees=&labels=new-model&template=model_request.yml&title=%F0%9F%A6%A0+Model+Request%3A+%3Cname%3E) or contact us using the following [form](https://www.ersilia.io/request-model). -After submitting your model request via an issue (suggested), a maintainer will review your request. If they `/approve` your request, a new model respository will be created for you to fork and use! There is a [demo repository](https://github.com/ersilia-os/eos-demo) explaining the steps one-by-one. +After submitting your model request via an issue (suggested), an Ersilia maintainer will review your request. If they approve your request, a new model respository will be created for you to fork and use! There is a [demo repository](https://github.com/ersilia-os/eos-demo) explaining the steps one-by-one. ## License and Citation @@ -113,6 +125,7 @@ Please note that Ersilia distinguises between software contributors and software - Carolina Caballero ### Cited by + The Ersilia Model Hub is used in a number of scientific projects. Read more about how we are implementing it in: - [Turon, Hlozek et al, Nat Commun, 2023](https://www.nature.com/articles/s41467-023-41512-2) - [Van Heerden et al, ACS Omega, 2023](https://pubs.acs.org/doi/10.1021/acsomega.3c05664) @@ -125,4 +138,5 @@ The [Ersilia Open Source Initiative](https://ersilia.io) is a Non Profit Organiz [Help us](https://www.ersilia.io/donate) achieve our mission! ### Funding -The Ersilia Model Hub is the flagship product of Ersilia. It has been funded thanks to a combination of funding sources. Full disclosure can be found in our [website](https://ersilia.io/supporters). Highlighted supporters include Splunk Pledge, the Mozilla Builders Accelerator and the AI2050 Program by Schmidt Futures. + +The Ersilia Model Hub is the flagship product of Ersilia. It has been funded thanks to a combination of funding sources. Full disclosure can be found in our [website](https://ersilia.io/supporters). Highlighted supporters include the Mozilla Builders Accelerator, Fast Forward, Splunk Pledge and the AI2050 Program by Schmidt Sciences. diff --git a/codemeta.json b/codemeta.json index 9cf7bc1e8..0e2f6d7c9 100644 --- a/codemeta.json +++ b/codemeta.json @@ -28,7 +28,7 @@ "givenName": "Miquel" } ], - "codeRepository": "https://github.com/ersilia-os/ersilia/v0.1.39", + "codeRepository": "https://github.com/ersilia-os/ersilia/v0.1.40", "dateCreated": "2021-01-01", "dateModified": "2024-10-01", "datePublished": "2022-10-06", @@ -221,7 +221,7 @@ ], "url": "https://ersilia.io", "downloadUrl": "https://github.com/ersilia-os/ersilia/archive/refs/tags/v0.1.37.tar.gz", - "version": "0.1.39", + "version": "0.1.40", "relatedLink": "https://ersilia.gitbook.io", "developmentStatus": "active", "issueTracker": "https://github.com/ersilia-os/ersilia/issues" diff --git a/docs/conf.py b/docs/conf.py index 3256b2ddd..687122d4a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,7 +13,7 @@ "sphinx.ext.mathjax", "sphinx.ext.viewcode", "sphinx.ext.napoleon", - 'sphinx.ext.autosummary' + "sphinx.ext.autosummary", ] autosummary_generate = True @@ -26,15 +26,12 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] html_theme = "furo" -html_theme_options = { - "collapse_navigation": True, - "navigation_depth": 4 -} +html_theme_options = {"collapse_navigation": True, "navigation_depth": 4} html_static_path = [] htmlhelp_basename = "ersilia_doc" -pygments_style = 'sphinx' +pygments_style = "sphinx" latex_documents = [ (master_doc, "ersilia.tex", "Ersilia Documentation", author, "manual"), diff --git a/ersilia/_static_version.py b/ersilia/_static_version.py index 43dcc4d90..7624855a8 100644 --- a/ersilia/_static_version.py +++ b/ersilia/_static_version.py @@ -1 +1 @@ -version = "0.1.39" +version = "0.1.40" diff --git a/ersilia/cli/cmd.py b/ersilia/cli/cmd.py index 0d76bba12..85052d689 100644 --- a/ersilia/cli/cmd.py +++ b/ersilia/cli/cmd.py @@ -59,4 +59,4 @@ def setup(self): def test(self): m = importlib.import_module("ersilia.cli.commands.test") - m.test_cmd() \ No newline at end of file + m.test_cmd() diff --git a/ersilia/cli/commands/catalog.py b/ersilia/cli/commands/catalog.py index 25434eb7e..04a9c8717 100644 --- a/ersilia/cli/commands/catalog.py +++ b/ersilia/cli/commands/catalog.py @@ -25,6 +25,7 @@ def catalog_cmd(): Display model card for a specific model ID and show catalog in json format: $ ersilia catalog --card --as-json """ + # Example usage: ersilia catalog @ersilia_cli.command(help="List a catalog of models") @click.option( diff --git a/ersilia/cli/commands/close.py b/ersilia/cli/commands/close.py index 584bfef56..79b2be96e 100644 --- a/ersilia/cli/commands/close.py +++ b/ersilia/cli/commands/close.py @@ -6,6 +6,7 @@ from ...core.session import Session from ...utils.session import deregister_model_session + def close_cmd(): """ Closes the current session. @@ -24,6 +25,7 @@ def close_cmd(): Close the current session: $ ersilia close """ + # Example usage: ersilia close {MODEL} @ersilia_cli.command(short_help="Close model", help="Close model") def close(): diff --git a/ersilia/cli/commands/delete.py b/ersilia/cli/commands/delete.py index a52593d8e..2d729eff8 100644 --- a/ersilia/cli/commands/delete.py +++ b/ersilia/cli/commands/delete.py @@ -31,6 +31,7 @@ def delete_cmd(): Delete all models: $ ersilia delete --all """ + def _delete(md, model_id): md.delete(model_id) @@ -46,9 +47,7 @@ def _delete_model_by_id(model_id): ) else: echo( - f":person_tipping_hand: {reason}".format( - model_id - ), + f":person_tipping_hand: {reason}".format(model_id), fg="yellow", ) diff --git a/ersilia/cli/commands/example.py b/ersilia/cli/commands/example.py index 328856556..2377c6689 100644 --- a/ersilia/cli/commands/example.py +++ b/ersilia/cli/commands/example.py @@ -29,7 +29,8 @@ def example(model, n_samples, file_name, simple, predefined): model_id = session.current_model_id() if not model_id: echo( - "No model found. Please specify a model or serve a model in the current shell.", fg="red" + "No model found. Please specify a model or serve a model in the current shell.", + fg="red", ) return eg = ExampleGenerator(model_id=model_id) @@ -41,4 +42,4 @@ def example(model, n_samples, file_name, simple, predefined): ) ) else: - eg.example(n_samples, file_name, simple, try_predefined=predefined) \ No newline at end of file + eg.example(n_samples, file_name, simple, try_predefined=predefined) diff --git a/ersilia/cli/commands/fetch.py b/ersilia/cli/commands/fetch.py index 797ae28af..292ac2a5f 100644 --- a/ersilia/cli/commands/fetch.py +++ b/ersilia/cli/commands/fetch.py @@ -30,6 +30,7 @@ def fetch_cmd(): Fetch a model from a local directory: $ ersilia fetch --from_dir """ + def _fetch(mf, model_id): res = asyncio.run(mf.fetch(model_id)) return res @@ -78,7 +79,7 @@ def _fetch(mf, model_id): "--hosted_url", default=None, type=click.STRING, - help="URL of the hosted model service" + help="URL of the hosted model service", ) @click.option( "--with_bentoml", @@ -136,6 +137,9 @@ def fetch( fg="green", ) else: - echo(f":thumbs_down: Model {model_id} failed to fetch! {fetch_result.reason}", fg="red") + echo( + f":thumbs_down: Model {model_id} failed to fetch! {fetch_result.reason}", + fg="red", + ) return fetch diff --git a/ersilia/cli/commands/info.py b/ersilia/cli/commands/info.py index 216ae8fec..373605625 100644 --- a/ersilia/cli/commands/info.py +++ b/ersilia/cli/commands/info.py @@ -25,6 +25,7 @@ def info_cmd(): Get information about active session as json: $ ersilia info --as_json """ + @ersilia_cli.command( short_help="Get model information", help="Get model information" ) diff --git a/ersilia/cli/commands/publish.py b/ersilia/cli/commands/publish.py index d5e173d5d..1edcbda76 100644 --- a/ersilia/cli/commands/publish.py +++ b/ersilia/cli/commands/publish.py @@ -28,6 +28,7 @@ def publish_cmd(): Rebase a model: $ ersilia publish rebase """ + def _publish(mf, model_id): mf.publish(model_id) diff --git a/ersilia/cli/commands/run.py b/ersilia/cli/commands/run.py index f515092bd..606244610 100644 --- a/ersilia/cli/commands/run.py +++ b/ersilia/cli/commands/run.py @@ -32,6 +32,7 @@ def run_cmd(): Run a model with batch size: $ ersilia run -i -b 50 """ + # Example usage: ersilia run -i {INPUT} [-o {OUTPUT} -b {BATCH_SIZE}] @ersilia_cli.command(short_help="Run a served model", help="Run a served model") @click.option("-i", "--input", "input", required=True, type=click.STRING) diff --git a/ersilia/cli/commands/serve.py b/ersilia/cli/commands/serve.py index be6a5af03..2d106be35 100644 --- a/ersilia/cli/commands/serve.py +++ b/ersilia/cli/commands/serve.py @@ -29,6 +29,7 @@ def serve_cmd(): Serve a model and track the session: $ ersilia serve --track """ + # Example usage: ersilia serve {MODEL} @ersilia_cli.command(short_help="Serve model", help="Serve model") @click.argument("model", type=click.STRING) @@ -77,7 +78,7 @@ def serve(model, output_source, lake, port, track): if mdl.url is None: echo("No URL found. Service unsuccessful.", fg="red") return - + register_model_session(mdl.model_id, mdl.session._session_dir) echo( ":rocket: Serving model {0}: {1}".format(mdl.model_id, mdl.slug), fg="green" diff --git a/ersilia/cli/commands/setup.py b/ersilia/cli/commands/setup.py index 27f3a237a..0317fb6be 100644 --- a/ersilia/cli/commands/setup.py +++ b/ersilia/cli/commands/setup.py @@ -25,6 +25,7 @@ def setup_cmd(): Set up the environment with base installation: $ ersilia setup --base """ + # Example usage: ersilia setup @ersilia_cli.command( short_help="Setup ersilia", diff --git a/ersilia/cli/commands/test.py b/ersilia/cli/commands/test.py index 011937cc1..73bca0208 100644 --- a/ersilia/cli/commands/test.py +++ b/ersilia/cli/commands/test.py @@ -3,6 +3,7 @@ from . import ersilia_cli from ...publish.test import ModelTester + def test_cmd(): """ Test a model and obtain performance metrics. @@ -24,66 +25,58 @@ def test_cmd(): With deep testing level and inspect: $ ersilia test my_model -d /path/to/model --level deep --inspect --remote """ + @ersilia_cli.command( short_help="Test a model", - help= - """ + help=""" Test a local models that are under development as well as on deployment and obtain a detailed report on its expected behavior and performance - """ - , + """, ) @click.argument("model", type=click.STRING) @click.option( - "-l", - "--level", - "level", + "-l", + "--level", + "level", help="Level of testing, None: for default, deep: for deep testing", - required=False, - default=None, - type=click.STRING + required=False, + default=None, + type=click.STRING, ) @click.option( - "-d", - "--dir", - "dir", + "-d", + "--dir", + "dir", help="Model directory", - required=False, - default=None, - type=click.STRING + required=False, + default=None, + type=click.STRING, ) @click.option( - "--inspect", - help="Inspect the model: More on the docs", - is_flag=True, - default=False + "--inspect", + help="Inspect the model: More on the docs", + is_flag=True, + default=False, ) @click.option( "--remote", - help="Test the model from remote git repository", - is_flag=True, - default=False + help="Test the model from remote git repository", + is_flag=True, + default=False, ) @click.option( "--remove", - help="Remove the model directory after testing", - is_flag=True, - default=False + help="Remove the model directory after testing", + is_flag=True, + default=False, ) - def test( - model, - level, - dir, - inspect, - remote, - remove - ): + def test(model, level, dir, inspect, remote, remove): mt = ModelTester( model_id=model, - level=level, + level=level, dir=dir, inspect=inspect, remote=remote, - remove=remove + remove=remove, ) echo("Setting up model tester...") mt.setup() diff --git a/ersilia/cli/create_cli.py b/ersilia/cli/create_cli.py index 47874ef7b..0bf2e1779 100644 --- a/ersilia/cli/create_cli.py +++ b/ersilia/cli/create_cli.py @@ -2,6 +2,7 @@ from .cmd import Command from .commands import ersilia_cli + def create_ersilia_cli(): """ Creates and configures the Ersilia CLI. diff --git a/ersilia/cli/echo.py b/ersilia/cli/echo.py index ce13afe43..61febb6af 100644 --- a/ersilia/cli/echo.py +++ b/ersilia/cli/echo.py @@ -27,6 +27,7 @@ class Silencer(object): silence() Disables CLI output. """ + def __init__(self): self.silence_file = os.path.join(get_session_dir(), SILENCE_FILE) if not os.path.exists(self.silence_file): diff --git a/ersilia/cli/messages.py b/ersilia/cli/messages.py index 9f97d1e69..f6428e5fe 100644 --- a/ersilia/cli/messages.py +++ b/ersilia/cli/messages.py @@ -2,6 +2,7 @@ from ..default import ERSILIA_MODEL_HUB_URL import sys + class ModelNotFound(object): """ A class to handle the scenario when a model is not found. @@ -16,6 +17,7 @@ class ModelNotFound(object): echo() Prints an error message and exits the program. """ + def __init__(self, model): self.model = model @@ -36,6 +38,7 @@ def echo(self): ) sys.exit(0) + class ModelNotInLocal(object): """ A class to handle the scenario when a model is not found locally. @@ -50,6 +53,7 @@ class ModelNotInLocal(object): echo() Prints an error message and exits the program. """ + def __init__(self, model_id): self.model_id = model_id diff --git a/ersilia/core/model.py b/ersilia/core/model.py index 7831be542..6be80b310 100644 --- a/ersilia/core/model.py +++ b/ersilia/core/model.py @@ -7,7 +7,7 @@ import collections import sys -from click import secho as echo # Style-aware echo +from click import secho as echo # Style-aware echo from .. import logger from ..serve.api import Api @@ -105,6 +105,7 @@ class ErsiliaModel(ErsiliaBase): model = ErsiliaModel(model="model_id") model.close() """ + def __init__( self, model: str, @@ -128,7 +129,7 @@ def __init__( else: self.logger.set_verbosity(0) else: - if hasattr(sys, 'ps1'): + if hasattr(sys, "ps1"): self.logger.set_verbosity(0) self.save_to_lake = save_to_lake if self.save_to_lake: @@ -153,7 +154,9 @@ def __init__( mdl = ModelBase(model) self._is_valid = mdl.is_valid() - assert self._is_valid, "The identifier {0} is not valid. Please visit the Ersilia Model Hub for valid identifiers".format( + assert ( + self._is_valid + ), "The identifier {0} is not valid. Please visit the Ersilia Model Hub for valid identifiers".format( model ) self.config_json = config_json @@ -219,7 +222,6 @@ def fetch(self): ) asyncio.run(mf.fetch(self.model_id)) - def __enter__(self): """ Enter the runtime context related to this object. @@ -312,7 +314,7 @@ def _get_url(self): def _get_api_instance(self, api_name): url = self._get_url() - if (api_name is None): + if api_name is None: api_names = self.autoservice.get_apis() assert ( len(api_names) == 1 @@ -782,7 +784,7 @@ def run( result = None standard_status_ok = False self.logger.debug("We will try conventional run.") - + if not standard_status_ok: self.logger.debug("Trying conventional run") result = self._run( diff --git a/ersilia/core/modelbase.py b/ersilia/core/modelbase.py index 90a3541d5..04bdf3add 100644 --- a/ersilia/core/modelbase.py +++ b/ersilia/core/modelbase.py @@ -26,6 +26,7 @@ class ModelBase(ErsiliaBase): config_json : dict, optional Configuration in JSON format, by default None. """ + @throw_ersilia_exception() def __init__(self, model_id_or_slug=None, repo_path=None, config_json=None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) @@ -51,7 +52,9 @@ def __init__(self, model_id_or_slug=None, repo_path=None, config_json=None): self.logger.debug(f"Absolute path: {abspath}") # Check if path actually exists if not os.path.exists(abspath): - raise FileNotFoundError("Model directory does not exist at the provided path. Please check the path and try again.") + raise FileNotFoundError( + "Model directory does not exist at the provided path. Please check the path and try again." + ) self.text = self._get_model_id_from_path(repo_path) self.model_id = self.text slug = self._get_slug_if_available(repo_path) diff --git a/ersilia/core/session.py b/ersilia/core/session.py index 2d0e14011..95cf0f695 100644 --- a/ersilia/core/session.py +++ b/ersilia/core/session.py @@ -24,6 +24,7 @@ class Session(ErsiliaBase): config_json : dict Configuration in JSON format. """ + def __init__(self, config_json): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self._session_dir = get_session_dir() diff --git a/ersilia/db/environments/localdb.py b/ersilia/db/environments/localdb.py index 09d672cba..6eac1359b 100644 --- a/ersilia/db/environments/localdb.py +++ b/ersilia/db/environments/localdb.py @@ -74,7 +74,9 @@ def create_table(self): env text, PRIMARY KEY (model_id, env) ); - """.format(self._table) + """.format( + self._table + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -106,7 +108,9 @@ def insert(self, model_id, env): return sql = """ INSERT OR IGNORE INTO {0} (model_id, env) VALUES ('{1}', '{2}') - """.format(self._table, model_id, env) + """.format( + self._table, model_id, env + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -129,7 +133,9 @@ def delete(self, model_id, env): sql = """ DELETE FROM {0} WHERE model_id = '{1}' AND env = '{2}' - """.format(self._table, model_id, env) + """.format( + self._table, model_id, env + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -155,7 +161,9 @@ def envs_of_model(self, model_id): sql = """ SELECT env FROM {0} WHERE model_id = '{1}' - """.format(self._table, model_id) + """.format( + self._table, model_id + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -182,7 +190,9 @@ def models_of_env(self, env): sql = """ SELECT model_id FROM {0} WHERE env = '{1}' - """.format(self._table, env) + """.format( + self._table, env + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -209,7 +219,9 @@ def models_with_same_env(self, model_id): sql = """ SELECT model_id FROM {0} WHERE env IN (SELECT env FROM {0} WHERE model_id = '{1}') - """.format(self._table, model_id) + """.format( + self._table, model_id + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -236,7 +248,9 @@ def envs_with_same_model(self, env): sql = """ SELECT env FROM {0} WHERE model_id IN (SELECT model_id FROM {0} WHERE env = '{1}') - """.format(self._table, env) + """.format( + self._table, env + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -257,7 +271,9 @@ def fetchall(self): return sql = """ SELECT * FROM {0} - """.format(self._table) + """.format( + self._table + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -273,7 +289,9 @@ def clean(self): return sql = """ DELETE FROM {0} - """.format(self._table) + """.format( + self._table + ) conn = self._connect() c = conn.cursor() c.execute(sql) diff --git a/ersilia/db/environments/managers.py b/ersilia/db/environments/managers.py index 91b268bb2..7dd41c85c 100644 --- a/ersilia/db/environments/managers.py +++ b/ersilia/db/environments/managers.py @@ -27,7 +27,7 @@ class DockerManager(ErsiliaBase): Manages Docker operations for Ersilia models. It provides methods to build, run, and manage Docker images and containers - associated with Ersilia models. + associated with Ersilia models. Parameters ---------- diff --git a/ersilia/db/hubdata/localslugs.py b/ersilia/db/hubdata/localslugs.py index ac54c93d1..64ba73de8 100644 --- a/ersilia/db/hubdata/localslugs.py +++ b/ersilia/db/hubdata/localslugs.py @@ -33,7 +33,9 @@ def create_table(self): slug text, PRIMARY KEY (model_id, slug) ); - """.format(self._table) + """.format( + self._table + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -55,7 +57,9 @@ def insert(self, model_id, slug): return sql = """ INSERT OR IGNORE INTO {0} (model_id, slug) VALUES ('{1}', '{2}') - """.format(self._table, model_id, slug) + """.format( + self._table, model_id, slug + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -76,7 +80,9 @@ def delete_by_model_id(self, model_id): sql = """ DELETE FROM {0} WHERE model_id = '{1}' - """.format(self._table, model_id) + """.format( + self._table, model_id + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -97,7 +103,9 @@ def delete_by_slug(self, slug): sql = """ DELETE FROM {0} WHERE slug = '{1}' - """.format(self._table, slug) + """.format( + self._table, slug + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -120,7 +128,9 @@ def delete(self, model_id, slug): sql = """ DELETE FROM {0} WHERE model_id = '{1}' AND slug = '{2}' - """.format(self._table, model_id, slug) + """.format( + self._table, model_id, slug + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -144,7 +154,9 @@ def models_of_slug(self, slug): sql = """ SELECT model_id FROM {0} WHERE slug = '{1}' - """.format(self._table, slug) + """.format( + self._table, slug + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -169,7 +181,9 @@ def slugs_of_model(self, model_id): sql = """ SELECT slug FROM {0} WHERE model_id = '{1}' - """.format(self._table, model_id) + """.format( + self._table, model_id + ) conn = self._connect() c = conn.cursor() c.execute(sql) @@ -185,7 +199,9 @@ def clean(self): return sql = """ DELETE FROM {0} - """.format(self._table) + """.format( + self._table + ) conn = self._connect() c = conn.cursor() c.execute(sql) diff --git a/ersilia/hub/bundle/bundle.py b/ersilia/hub/bundle/bundle.py index c540ad112..8575367b1 100644 --- a/ersilia/hub/bundle/bundle.py +++ b/ersilia/hub/bundle/bundle.py @@ -105,7 +105,7 @@ def add_model_install_commands(self): with open(f0, "w") as f: yaml.safe_dump(data, f, sort_keys=False) - def check(self): # TODO: Removing this fucntion + def check(self): # TODO: Removing this fucntion """ Check if the environment file exists. @@ -159,7 +159,7 @@ def add_model_install_commands(self): for l in reqs: f.write(l + os.linesep) - def check(self): # TODO: Removing this fucntion + def check(self): # TODO: Removing this fucntion """ Check if the requirements file exists. @@ -243,7 +243,7 @@ def set_to_full(self): with open(self.path, "w") as f: f.write(content) - def check(self): # TODO: Removing this fucntion + def check(self): # TODO: Removing this fucntion """ Check if the Dockerfile exists. diff --git a/ersilia/hub/bundle/repo.py b/ersilia/hub/bundle/repo.py index b1b1b2faa..f8ecbc4c1 100644 --- a/ersilia/hub/bundle/repo.py +++ b/ersilia/hub/bundle/repo.py @@ -172,8 +172,8 @@ def get_file(self) -> str: def needs_model(self) -> bool: """ - Check if the pack file needs a model. Specifically this determines whether the "pack.py" file - requires a model by checking if the file contains lines with the .pack() method and whether "None" + Check if the pack file needs a model. Specifically this determines whether the "pack.py" file + requires a model by checking if the file contains lines with the .pack() method and whether "None" is specified as an argument. Returns @@ -518,9 +518,9 @@ def get_model_id(self) -> str: The model ID. """ model_id = self._get_model_id_from_path() - if (model_id is None): + if model_id is None: model_id = self._get_model_id_from_config() - if (model_id is None): + if model_id is None: model_id = DEFAULT_MODEL_ID return model_id diff --git a/ersilia/hub/content/base_information.py b/ersilia/hub/content/base_information.py new file mode 100644 index 000000000..b71574654 --- /dev/null +++ b/ersilia/hub/content/base_information.py @@ -0,0 +1,555 @@ +import os +import datetime +import validators + +try: + from validators import ValidationFailure +except ImportError: + from validators import ValidationError as ValidationFailure + + +from ...utils.exceptions_utils.base_information_exceptions import ( + SlugBaseInformationError, + IdentifierBaseInformationError, + StatusBaseInformationError, + TitleBaseInformationError, + DescriptionBaseInformationError, + ModeBaseInformationError, + SourceBaseInformationError, + SourceTypeBaseInformationError, + InputBaseInformationError, + InputShapeBaseInformationError, + OutputBaseInformationError, + OutputTypeBaseInformationError, + OutputShapeBaseInformationError, + OutputDimensionBaseInformationError, + OutputConsistencyBaseInformationError, + TaskBaseInformationError, + SubtaskBaseInformationError, + BiomedicalAreaBaseInformationError, + TargetOrganismBaseInformationError, + TagBaseInformationError, + PublicationBaseInformationError, + PublicationTypeBaseInformationError, + PublicationYearBaseInformationError, + SourceCodeBaseInformationError, + LicenseBaseInformationError, + GithubBaseInformationError, + DockerhubBaseInformationError, + DockerArchitectureBaseInformationError, + S3BaseInformationError, + BothIdentifiersBaseInformationError, + MemoryGbBaseInformationError, +) +from ...utils.identifiers.model import ModelIdentifier +from ... import ErsiliaBase + + +class BaseInformation(ErsiliaBase): + def __init__(self, config_json): + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self._github = None + self._identifier = None + self._slug = None + self._status = None + self._title = None + self._description = None + self._mode = None + self._task = None + self._input = None + self._input_shape = None + self._output = None + self._output_type = None + self._output_shape = None + self._output_dimension = None + self._output_consistency = None + self._interpretation = None + self._tag = None + self._publication = None + self._source_code = None + self._license = None + self._contributor = None + self._dockerhub = None + self._docker_architecture = None + self._s3 = None + self._memory_gb = None + + def _is_valid_url(self, url_string: str) -> bool: + result = validators.url(url_string) + if isinstance(result, ValidationFailure): + return False + return result + + def _read_default_fields(self, field): + root = os.path.dirname(os.path.abspath(__file__)) + filename = field.lower().replace(" ", "_") + file_path = os.path.join(root, "metadata", filename + ".txt") + with open(file_path, "r") as f: + valid_field = f.read().split("\n") + return valid_field + + @property + def identifier(self): + return self._identifier + + @identifier.setter + def identifier(self, new_identifier): + mi = ModelIdentifier() + if not mi.is_valid(new_identifier): + raise IdentifierBaseInformationError + self._identifier = new_identifier + + @property + def slug(self): + return self._slug + + @slug.setter + def slug(self, new_slug): + if new_slug.lower() != new_slug: + raise SlugBaseInformationError + if len(new_slug) > 60: + raise SlugBaseInformationError + if len(new_slug) < 5: + raise SlugBaseInformationError + self._slug = new_slug + + @property + def status(self): + return self._status + + @status.setter + def status(self, new_status): + if new_status not in self._read_default_fields("Status"): + raise StatusBaseInformationError + self._status = new_status + + @property + def title(self): + return self._title + + @title.setter + def title(self, new_title): + if len(new_title) > 300: + raise TitleBaseInformationError + if len(new_title) < 10: + raise TitleBaseInformationError + self._title = new_title + + @property + def description(self): + return self._description + + @description.setter + def description(self, new_description): + if len(new_description) < 200: + raise DescriptionBaseInformationError + if new_description == self._title: + raise DescriptionBaseInformationError + self._description = new_description + + @property + def mode(self): + return self._mode + + @mode.setter + def mode(self, new_mode): + if new_mode not in self._read_default_fields("Mode"): + raise ModeBaseInformationError + self._mode = new_mode + + @property + def source(self): + return self._source + + @source.setter + def source(self, new_source): + if new_source not in self._read_default_fields("Source"): + raise SourceBaseInformationError + self._source = new_source + + @property + def source_type(self): + return self._source_type + + @source_type.setter + def source_type(self, new_source_type): + if new_source_type not in self._read_default_fields("Source Type"): + raise SourceTypeBaseInformationError + self._source_type = new_source_type + + @property + def input(self): + return self._input + + @input.setter + def input(self, new_input): + if type(new_input) is str: + new_input = [new_input] + if type(new_input) is not list: + raise InputBaseInformationError + for inp in new_input: + if inp not in self._read_default_fields("Input"): + raise InputBaseInformationError + self._input = new_input + + @property + def input_shape(self): + return self._input_shape + + @input_shape.setter + def input_shape(self, new_input_shape): + if new_input_shape not in self._read_default_fields("Input Shape"): + raise InputShapeBaseInformationError + self._input_shape = new_input_shape + + @property + def task(self): + return self._task + + @task.setter + def task(self, new_task): + if type(new_task) is str: + new_task = [new_task] + if type(new_task) is not list: + raise TaskBaseInformationError + for nt in new_task: + if nt not in self._read_default_fields("Task"): + raise TaskBaseInformationError + self._task = new_task + + @property + def subtask(self): + return self._subtask + + @subtask.setter + def subtask(self, new_subtask): + if type(new_subtask) is str: + new_subtask = [new_subtask] + if type(new_subtask) is not list: + raise SubtaskBaseInformationError + for nt in new_subtask: + if nt not in self._read_default_fields("Subtask"): + raise SubtaskBaseInformationError + self._subtask = new_subtask + + @property + def biomedical_area(self): + return self._biomedical_area + + @biomedical_area.setter + def biomedical_area(self, new_biomedical_area): + if type(new_biomedical_area) is str: + new_biomedical_area = [new_biomedical_area] + if type(new_biomedical_area) is not list: + raise BiomedicalAreaBaseInformationError + for nt in new_biomedical_area: + if nt not in self._read_default_fields("Biomedical Area"): + raise BiomedicalAreaBaseInformationError + self._biomedical_area = new_biomedical_area + + @property + def target_organism(self): + return self._target_organism + + @target_organism.setter + def target_organism(self, new_target_organism): + if type(new_target_organism) is str: + new_target_organism = [new_target_organism] + if type(new_target_organism) is not list: + raise TargetOrganismBaseInformationError + for nt in new_target_organism: + if nt not in self._read_default_fields("Target Organism"): + raise TargetOrganismBaseInformationError + self._target_organism = new_target_organism + + @property + def output(self): + return self._output + + @output.setter + def output(self, new_output): + if type(new_output) is str: + new_output = [new_output] + default_output = self._read_default_fields("Output") + for no in new_output: + if no not in default_output: + raise OutputBaseInformationError + self._output = new_output + + @property + def output_type(self): + return self._output_type + + @output_type.setter + def output_type(self, new_output_type): + if type(new_output_type) is str: + new_output_type = [new_output_type] + default_output_type = self._read_default_fields("Output Type") + for no in new_output_type: + if no not in default_output_type: + raise OutputTypeBaseInformationError + self._output_type = new_output_type + + @property + def output_shape(self): + return self._output_shape + + @output_shape.setter + def output_shape(self, new_output_shape): + default_output_shape = self._read_default_fields("Output Shape") + if new_output_shape not in default_output_shape: + raise OutputShapeBaseInformationError + self._output_shape = new_output_shape + + @property + def output_dimension(self): + return self._output_dimension + + @output_dimension.setter + def output_dimension(self, new_output_dimension): + if type(new_output_dimension) is not int: + raise OutputDimensionBaseInformationError + if new_output_dimension < 1: + raise OutputDimensionBaseInformationError + self._output_dimension = new_output_dimension + + @property + def output_consistency(self): + return self._output_consistency + + @output_consistency.setter + def output_consistency(self, new_output_consistency): + default_output_consistency = self._read_default_fields("Output Consistency") + if new_output_consistency not in default_output_consistency: + raise OutputConsistencyBaseInformationError + self._output_consistency = new_output_consistency + + @property + def interpretation(self): + return self._interpretation + + @interpretation.setter + def interpretation(self, new_interpretation): + self._interpretation = new_interpretation + + @property + def tag(self): + return self._tag + + @tag.setter + def tag(self, new_tag): + if type(new_tag) is str: + new_tag = [new_tag] + if type(new_tag) is not list: + raise TagBaseInformationError + default_tags = self._read_default_fields("Tag") + for nt in new_tag: + if nt not in default_tags: + raise TagBaseInformationError + self._tag = new_tag + + @property + def publication(self): + return self._publication + + @publication.setter + def publication(self, new_publication): + if not self._is_valid_url(new_publication): + raise PublicationBaseInformationError + self._publication = new_publication + + @property + def publication_type(self): + return self._publication_type + + @publication_type.setter + def publication_type(self, new_publication_type): + if new_publication_type not in self._read_default_fields("Publication Type"): + raise PublicationTypeBaseInformationError + self._publication_type = new_publication_type + + @property + def publication_year(self): + return self._publication_year + + @publication_year.setter + def publication_year(self, new_publication_year): + if type(new_publication_year) is not int: + raise PublicationYearBaseInformationError + if new_publication_year < 1900 or new_publication_year > datetime.today("Y"): + raise PublicationBaseInformationError + self._publication_year = new_publication_year + + @property + def source_code(self): + return self._source_code + + @source_code.setter + def source_code(self, new_source_code): + if not self._is_valid_url(new_source_code): + raise SourceCodeBaseInformationError + self._source_code = new_source_code + + @property + def license(self): + return self._license + + @license.setter + def license(self, new_license): + if new_license not in self._read_default_fields("License"): + raise LicenseBaseInformationError + self._license = new_license + + @property + def date(self): + return self._date + + @date.setter + def date(self, new_date): + self._date = new_date + + @property + def contributor(self): + return self._contributor + + @contributor.setter + def contributor(self, new_contributor): + self._contributor = new_contributor + + @property + def github(self): + model_id = self.identifier + if model_id is None: + raise GithubBaseInformationError + self._github = "https://github.com/ersilia-os/{0}".format(model_id) + return self._github + + @property + def dockerhub(self): + return self._dockerhub + + @dockerhub.setter + def dockerhub(self, new_dockerhub_url): + if not new_dockerhub_url.startswith("https://hub.docker.com/r/ersiliaos/"): + raise DockerhubBaseInformationError + self._dockerhub = new_dockerhub_url + + @property + def docker_architecture(self): + return self._docker_architecture + + @docker_architecture.setter + def docker_architecture(self, new_docker_architecture): + if type(new_docker_architecture) is str: + new_docker_architecture = [new_docker_architecture] + for d in new_docker_architecture: + if d not in self._read_default_fields("Docker Architecture"): + raise DockerArchitectureBaseInformationError + self._docker_architecture = new_docker_architecture + + @property + def s3(self): + return self._s3 + + @s3.setter + def s3(self, new_s3_url): + if not new_s3_url.startswith( + "https://ersilia-models-zipped.s3.eu-central-1.amazonaws.com/" + ): + raise S3BaseInformationError + self._s3 = new_s3_url + + @property + def both_identifiers(self): + model_id = self.identifier + slug = self.slug + if model_id is None or slug is None: + raise BothIdentifiersBaseInformationError + self._both_identifiers = (model_id, slug) + return self._both_identifiers + + @property + def memory_gb(self): + return self._memory_gb + + @memory_gb.setter + def memory_gb(self, new_memory_gb): + if type(new_memory_gb) != int: + raise MemoryGbBaseInformationError + self._memory_gb = new_memory_gb + + def as_dict(self): + data = { + "Identifier": self.identifier, + "Slug": self.slug, + "Status": self.status, + "Title": self.title, + "Description": self.description, + "Mode": self.mode, + "Source": self.source, + "Source Type": self.source_type, + "Input": self.input, + "Input Shape": self.input_shape, + "Task": self.task, + "Subtask": self.subtask, + "Biomedical Area": self.biomedical_area, + "Target organism": self.target_organism, + "Output": self.output, + "Output Type": self.output_type, + "Output Shape": self.output_shape, + "Output Dimension": self.output_dimension, + "Output Consistency": self.output_consistency, + "Interpretation": self.interpretation, + "Tag": self.tag, + "Publication": self.publication, + "Publication Type": self.publication_type, + "Publication Year": self.publication_year, + "Source Code": self.source_code, + "License": self.license, + "Contributor": self.contributor, + "DockerHub": self.dockerhub, + "Docker Architecture": self.docker_architecture, + "S3": self.s3, + "Memory Gb": self.memory_gb, + } + data = dict((k, v) for k, v in data.items() if v is not None) + return data + + def _assign(self, var, key, data): + if key in data: + var = data[key] + else: + var = None + + def from_dict(self, data): + self._assign(self.identifier, "Identifier", data) + self._assign(self.slug, "Slug", data) + self._assign(self.status, "Status", data) + self._assign(self.title, "Title", data) + self._assign(self.description, "Description", data) + self._assign(self.mode, "Mode", data) + self._assign(self.source, "Source", data) + self._assign(self.source_type, "Source Type", data) + self._assign(self.input, "Input", data) + self._assign(self.input_shape, "Input Shape", data) + self._assign(self.task, "Task", data) + self._assign(self.subtask, "Subtask", data) + self._assign(self.biomedical_area, "Biomedical Area", data) + self._assign(self.target_organism, "Target Organism", data) + self._assign(self.output, "Output", data) + self._assign(self.output_type, "Output Type", data) + self._assign(self.output_shape, "Output Shape", data) + self._assign(self.output_dimension, "Output Dimension", data) + self._assign(self.output_consistency, "Output Consistency", data) + self._assign(self.interpretation, "Interpretation", data) + self._assign(self.tag, "Tag", data) + self._assign(self.publication, "Publication", data) + self._assign(self.publication_type, "Publication Type", data) + self._assign(self.publication_year, "Publication Year", data) + self._assign(self.source_code, "Source Code", data) + self._assign(self.license, "License", data) + self._assign(self.contributor, "Contributor", data) + self._assign(self.dockerhub, "DockerHub", data) + self._assign(self.docker_architecture, "Docker Architecture", data) + self._assign(self.s3, "S3", data) + self._assign(self.memory_gb, "Memory Gb", data) diff --git a/ersilia/hub/content/card.py b/ersilia/hub/content/card.py index 4ee1a1fd6..19a243fac 100644 --- a/ersilia/hub/content/card.py +++ b/ersilia/hub/content/card.py @@ -2,42 +2,12 @@ import json import requests import yaml + +from .base_information import BaseInformation from ... import ErsiliaBase from ...utils.terminal import run_command from ...auth.auth import Auth from ...db.hubdata.interfaces import JsonModelsInterface -import validators - -try: - from validators import ValidationFailure -except ImportError: - from validators import ValidationError as ValidationFailure - -from ...utils.exceptions_utils.card_exceptions import ( - SlugBaseInformationError, - IdentifierBaseInformationError, - StatusBaseInformationError, - TitleBaseInformationError, - DescriptionBaseInformationError, - ModeBaseInformationError, - InputBaseInformationError, - InputShapeBaseInformationError, - OutputBaseInformationError, - OutputTypeBaseInformationError, - OutputShapeBaseInformationError, - TaskBaseInformationError, - TagBaseInformationError, - PublicationBaseInformationError, - SourceCodeBaseInformationError, - LicenseBaseInformationError, - GithubBaseInformationError, - DockerhubBaseInformationError, - DockerArchitectureInformationError, - S3BaseInformationError, - BothIdentifiersBaseInformationError, - MemoryGbBaseInformationError, -) -from ...utils.identifiers.model import ModelIdentifier from ...utils.logging import make_temp_dir try: @@ -54,424 +24,6 @@ from ...utils.paths import get_metadata_from_base_dir -class BaseInformation(ErsiliaBase): - """ - Class to handle the base information of a model card. - - A model card contains metadata about a model, such as its identifier, slug, status, - title, description, mode, input, output, and other relevant information. And This class provides - methods to validate and set various fields of a model card. - - Parameters - ---------- - config_json : dict - Configuration settings in JSON format. - """ - - def __init__(self, config_json=None): - ErsiliaBase.__init__( - self, config_json=config_json, credentials_json=None - ) - self._github = None - self._identifier = None - self._slug = None - self._status = None - self._title = None - self._description = None - self._mode = None - self._task = None - self._input = None - self._input_shape = None - self._output = None - self._output_type = None - self._output_shape = None - self._interpretation = None - self._tag = None - self._publication = None - self._source_code = None - self._license = None - self._contributor = None - self._dockerhub = None - self._docker_architecture = None - self._s3 = None - self._memory_gb = None - - def _is_valid_url(self, url_string: str) -> bool: - result = validators.url(url_string) - if isinstance(result, ValidationFailure): - return False - return result - - def _read_default_fields(self, field): - root = os.path.dirname(os.path.abspath(__file__)) - filename = field.lower().replace(" ", "_") - file_path = os.path.join(root, "metadata", filename + ".txt") - with open(file_path, "r") as f: - valid_field = f.read().split("\n") - return valid_field - - @property - def identifier(self): - return self._identifier - - @identifier.setter - def identifier(self, new_identifier): - mi = ModelIdentifier() - if not mi.is_valid(new_identifier): - raise IdentifierBaseInformationError - self._identifier = new_identifier - - @property - def slug(self): - return self._slug - - @slug.setter - def slug(self, new_slug): - if new_slug.lower() != new_slug: - raise SlugBaseInformationError - if len(new_slug) > 60: - raise SlugBaseInformationError - if len(new_slug) < 5: - raise SlugBaseInformationError - self._slug = new_slug - - @property - def status(self): - return self._status - - @status.setter - def status(self, new_status): - if new_status not in self._read_default_fields("Status"): - raise StatusBaseInformationError - self._status = new_status - - @property - def title(self): - return self._title - - @title.setter - def title(self, new_title): - if len(new_title) > 300: - raise TitleBaseInformationError - if len(new_title) < 10: - raise TitleBaseInformationError - self._title = new_title - - @property - def description(self): - return self._description - - @description.setter - def description(self, new_description): - if len(new_description) < 200: - raise DescriptionBaseInformationError - if new_description == self._title: - raise DescriptionBaseInformationError - self._description = new_description - - @property - def mode(self): - return self._mode - - @mode.setter - def mode(self, new_mode): - if new_mode not in self._read_default_fields("Mode"): - raise ModeBaseInformationError - self._mode = new_mode - - @property - def input(self): - return self._input - - @input.setter - def input(self, new_input): - if type(new_input) is str: - new_input = [new_input] - if type(new_input) is not list: - raise InputBaseInformationError - for inp in new_input: - if inp not in self._read_default_fields("Input"): - raise InputBaseInformationError - self._input = new_input - - @property - def input_shape(self): - return self._input_shape - - @input_shape.setter - def input_shape(self, new_input_shape): - if new_input_shape not in self._read_default_fields( - "Input Shape" - ): - raise InputShapeBaseInformationError - self._input_shape = new_input_shape - - @property - def task(self): - return self._task - - @task.setter - def task(self, new_task): - if type(new_task) is str: - new_task = [new_task] - if type(new_task) is not list: - raise TaskBaseInformationError - for nt in new_task: - if nt not in self._read_default_fields("Task"): - raise TaskBaseInformationError - self._task = new_task - - @property - def output(self): - return self._output - - @output.setter - def output(self, new_output): - if type(new_output) is str: - new_output = [new_output] - default_output = self._read_default_fields("Output") - for no in new_output: - if no not in default_output: - raise OutputBaseInformationError - self._output = new_output - - @property - def output_type(self): - return self._output_type - - @output_type.setter - def output_type(self, new_output_type): - if type(new_output_type) is str: - new_output_type = [new_output_type] - default_output_type = self._read_default_fields("Output Type") - for no in new_output_type: - if no not in default_output_type: - raise OutputTypeBaseInformationError - self._output_type = new_output_type - - @property - def output_shape(self): - return self._output_shape - - @output_shape.setter - def output_shape(self, new_output_shape): - default_output_shape = self._read_default_fields("Output Shape") - if new_output_shape not in default_output_shape: - raise OutputShapeBaseInformationError - self._output_shape = new_output_shape - - @property - def interpretation(self): - return self._interpretation - - @interpretation.setter - def interpretation(self, new_interpretation): - self._interpretation = new_interpretation - - @property - def tag(self): - return self._tag - - @tag.setter - def tag(self, new_tag): - if type(new_tag) is str: - new_tag = [new_tag] - if type(new_tag) is not list: - raise TagBaseInformationError - default_tags = self._read_default_fields("Tag") - for nt in new_tag: - if nt not in default_tags: - raise TagBaseInformationError - self._tag = new_tag - - @property - def publication(self): - return self._publication - - @publication.setter - def publication(self, new_publication): - if not self._is_valid_url(new_publication): - raise PublicationBaseInformationError - self._publication = new_publication - - @property - def source_code(self): - return self._source_code - - @source_code.setter - def source_code(self, new_source_code): - if not self._is_valid_url(new_source_code): - raise SourceCodeBaseInformationError - self._source_code = new_source_code - - @property - def license(self): - return self._license - - @license.setter - def license(self, new_license): - if new_license not in self._read_default_fields("License"): - raise LicenseBaseInformationError - self._license = new_license - - @property - def date(self): - return self._date - - @date.setter - def date(self, new_date): - self._date = new_date - - @property - def contributor(self): - return self._contributor - - @contributor.setter - def contributor(self, new_contributor): - self._contributor = new_contributor - - @property - def github(self): - model_id = self.identifier - if model_id is None: - raise GithubBaseInformationError - self._github = "https://github.com/ersilia-os/{0}".format( - model_id - ) - return self._github - - @property - def dockerhub(self): - return self._dockerhub - - @dockerhub.setter - def dockerhub(self, new_dockerhub_url): - if not new_dockerhub_url.startswith( - "https://hub.docker.com/r/ersiliaos/" - ): - raise DockerhubBaseInformationError - self._dockerhub = new_dockerhub_url - - @property - def docker_architecture(self): - return self._docker_architecture - - @docker_architecture.setter - def docker_architecture(self, new_docker_architecture): - if type(new_docker_architecture) is str: - new_docker_architecture = [new_docker_architecture] - for d in new_docker_architecture: - if d not in self._read_default_fields( - "Docker Architecture" - ): - raise DockerArchitectureInformationError - self._docker_architecture = new_docker_architecture - - @property - def s3(self): - return self._s3 - - @s3.setter - def s3(self, new_s3_url): - if not new_s3_url.startswith( - "https://ersilia-models-zipped.s3.eu-central-1.amazonaws.com/" - ): - raise S3BaseInformationError - self._s3 = new_s3_url - - @property - def both_identifiers(self): - model_id = self.identifier - slug = self.slug - if model_id is None or slug is None: - raise BothIdentifiersBaseInformationError - self._both_identifiers = (model_id, slug) - return self._both_identifiers - - @property - def memory_gb(self): - return self._memory_gb - - @memory_gb.setter - def memory_gb(self, new_memory_gb): - if type(new_memory_gb) != int: - raise MemoryGbBaseInformationError - self._memory_gb = new_memory_gb - - def as_dict(self) -> dict: - """ - Convert the base information to a dictionary. - - Returns - ------- - dict - The base information as a dictionary. - """ - data = { - "Identifier": self.identifier, - "Slug": self.slug, - "Status": self.status, - "Title": self.title, - "Description": self.description, - "Mode": self.mode, - "Input": self.input, - "Input Shape": self.input_shape, - "Task": self.task, - "Output": self.output, - "Output Type": self.output_type, - "Output Shape": self.output_shape, - "Interpretation": self.interpretation, - "Tag": self.tag, - "Publication": self.publication, - "Source Code": self.source_code, - "License": self.license, - "Contributor": self.contributor, - "DockerHub": self.dockerhub, - "Docker Architecture": self.docker_architecture, - "S3": self.s3, - "Memory Gb": self.memory_gb, - } - data = dict((k, v) for k, v in data.items() if v is not None) - return data - - def from_dict(self, data: dict): - """ - Set the base information from a dictionary. - - Parameters - ---------- - data : dict - The dictionary containing the base information. - """ - self.identifier = data["Identifier"] - self.slug = data["Slug"] - self.status = data["Status"] - self.title = data["Title"] - self.description = data["Description"] - self.mode = data["Mode"] - self.input = data["Input"] - self.input_shape = data["Input Shape"] - self.task = data["Task"] - self.output = data["Output"] - self.output_type = data["Output Type"] - self.output_shape = data["Output Shape"] - self.interpretation = data["Interpretation"] - self.tag = data["Tag"] - self.publication = data["Publication"] - self.source_code = data["Source Code"] - self.license = data["License"] - if "Contributor" in data: - self.contributor = data["Contributor"] - if "DockerHub" in data: - self.dockerhub = data["DockerHub"] - if "Docker Architecture" in data: - self.docker_architecture = data["Docker Architecture"] - if "S3" in data: - self.s3 = data["S3"] - if "Memory Gb" in data: - self.memory_gb = data["Memory Gb"] - - class RepoMetadataFile(ErsiliaBase): """ Class to handle the metadata file of a model repository. @@ -489,19 +41,15 @@ class RepoMetadataFile(ErsiliaBase): def __init__(self, model_id=None, config_json=None): self.model_id = model_id - ErsiliaBase.__init__( - self, config_json=config_json, credentials_json=None - ) + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) def _github_json_url(self, org=None, branch=None): if org is None: org = "ersilia-os" if branch is None: branch = "main" - return ( - "https://raw.githubusercontent.com/{0}/{1}/{2}/{3}".format( - org, self.model_id, branch, METADATA_JSON_FILE - ) + return "https://raw.githubusercontent.com/{0}/{1}/{2}/{3}".format( + org, self.model_id, branch, METADATA_JSON_FILE ) def _github_yaml_url(self, org=None, branch=None): @@ -509,10 +57,8 @@ def _github_yaml_url(self, org=None, branch=None): org = "ersilia-os" if branch is None: branch = "main" - return ( - "https://raw.githubusercontent.com/{0}/{1}/{2}/{3}".format( - org, self.model_id, branch, METADATA_YAML_FILE - ) + return "https://raw.githubusercontent.com/{0}/{1}/{2}/{3}".format( + org, self.model_id, branch, METADATA_YAML_FILE ) def _get_file_content_from_github(self, org, branch): @@ -528,9 +74,7 @@ def _get_file_content_from_github(self, org, branch): else: return json.loads(r.content) - def get_json_or_yaml_file( - self, org: str = None, branch: str = None - ) -> dict: + def get_json_or_yaml_file(self, org: str = None, branch: str = None) -> dict: """ Get the metadata file from GitHub in JSON or YAML format. JSON format typically used for bentoml packed models and YAML format typically used for ersilia pack models. @@ -651,9 +195,7 @@ def get(self, model_id: str = None, slug: str = None) -> dict: """ if model_id is not None: dest_dir = self._model_path(model_id=model_id) - self.logger.debug( - "Trying to get metadata from: {0}".format(dest_dir) - ) + self.logger.debug("Trying to get metadata from: {0}".format(dest_dir)) try: data = get_metadata_from_base_dir(dest_dir) except FileNotFoundError: @@ -679,17 +221,17 @@ def __init__(self, config_json): ErsiliaBase.__init__(self, config_json=config_json) def _raw_readme_url(self, model_id): - url = "https://raw.githubusercontent.com/ersilia-os/{0}/master/README.md".format( - model_id + url = ( + "https://raw.githubusercontent.com/ersilia-os/{0}/master/README.md".format( + model_id + ) ) return url def _gh_view(self, model_id): tmp_folder = make_temp_dir(prefix="ersilia-") tmp_file = os.path.join(tmp_folder, "view.md") - cmd = "gh repo view {0}/{1} > {2}".format( - "ersilia-os", model_id, tmp_file - ) + cmd = "gh repo view {0}/{1} > {2}".format("ersilia-os", model_id, tmp_file) run_command(cmd) with open(tmp_file, "r") as f: text = f.read() @@ -821,8 +363,8 @@ class LakeCard(ErsiliaBase): """ Class to handle the lake card of a model. - The lake in ersilia refers to a result storage platform powered by isaura package to - store repeated result as a cache and allows user to reuse them. It uses HDF5 explorer to + The lake in ersilia refers to a result storage platform powered by isaura package to + store repeated result as a cache and allows user to reuse them. It uses HDF5 explorer to explore and retrieve information from HDF5 files. Parameters diff --git a/ersilia/hub/content/catalog.py b/ersilia/hub/content/catalog.py index cd02c76f9..5ed5d0f3f 100644 --- a/ersilia/hub/content/catalog.py +++ b/ersilia/hub/content/catalog.py @@ -40,6 +40,7 @@ class CatalogTable(object): columns : list The columns of the catalog table. """ + def __init__(self, data, columns): self.data = data self.columns = columns @@ -303,7 +304,7 @@ def _get_catalog(self, columns: list, model_cards: list): R += [r] R = sorted(R, key=lambda x: x[0]) return CatalogTable(data=R, columns=columns) - + def hub(self): """List models available in Ersilia model hub from the S3 JSON""" ji = JsonModelsInterface() @@ -322,7 +323,7 @@ def local(self) -> CatalogTable: The catalog table containing the models available locally. """ mc = ModelCard() - columns = self.LESS_FIELDS if self.less else self.MORE_FIELDS+["Model Source"] + columns = self.LESS_FIELDS if self.less else self.MORE_FIELDS + ["Model Source"] cards = [] for model_id in os.listdir(self._bundles_dir): if not self._is_eos(model_id): @@ -334,7 +335,6 @@ def local(self) -> CatalogTable: table = self._get_catalog(columns, cards) return table - def bentoml(self) -> CatalogTable: """ List models available as BentoServices. diff --git a/ersilia/hub/content/columns/data_types.txt b/ersilia/hub/content/columns/data_types.txt new file mode 100644 index 000000000..f93069cf5 --- /dev/null +++ b/ersilia/hub/content/columns/data_types.txt @@ -0,0 +1,4 @@ +float +integer +string +other \ No newline at end of file diff --git a/ersilia/hub/content/columns/desired_directions.txt b/ersilia/hub/content/columns/desired_directions.txt new file mode 100644 index 000000000..70af84e5a --- /dev/null +++ b/ersilia/hub/content/columns/desired_directions.txt @@ -0,0 +1,3 @@ +high +intermediate +low \ No newline at end of file diff --git a/ersilia/hub/content/columns_information.py b/ersilia/hub/content/columns_information.py new file mode 100644 index 000000000..f9df6ea57 --- /dev/null +++ b/ersilia/hub/content/columns_information.py @@ -0,0 +1,125 @@ +import os +import csv +import tempfile +from urllib.request import urlopen +from ... import ErsiliaBase + + +ROOT = os.path.dirname(os.path.abspath(__file__)) + +EXPECTED_HEADER = ["name", "type", "direction", "description"] + +MIN_DESCRIPTION_LENGTH = 60 + + +class ColumnsInformation(ErsiliaBase): + def __init__(self, model_id, api_name, config_json=None): + self.model_id = model_id + self.api_name = api_name + ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) + self.relative_path = "model/framework/columns/{0}_columns.csv".format( + self.api_name + ) + with open(os.path.join(ROOT, "columns", "data_types.txt"), "r") as f: + self.DATA_TYPES = [] + for l in f: + self.DATA_TYPES += [l.strip()] + with open(os.path.join(ROOT, "columns", "desired_directions.txt"), "r") as f: + self.DESIRED_DIRECTIONS = [] + for l in f: + self.DESIRED_DIRECTIONS += [l.strip()] + + def _get_columns_information_from_file(self, file_name): + if os.path.exists(file_name): + with open(file_name, "r") as f: + names = [] + types = [] + directions = [] + descriptions = [] + reader = csv.reader(f) + header = next(reader) + if header != EXPECTED_HEADER: + raise ValueError( + "Header {0} is not {1}".format(header, EXPECTED_HEADER) + ) + for r in reader: + names += [r[0]] + types += [r[1]] + if r[2] == "": + directions += [None] + else: + directions += [r[2]] + descriptions += [r[3]] + return {"name": names, "type": types, "direction": directions, "description": descriptions} + else: + self.logger.debug( + "Explicit columns data for {0} API does not exist in file {1}".format( + self.api_name, file_name + ) + ) + return None + + def _get_columns_information_from_local(self): + file_name = os.path.join(self._model_path(self.model_id), self.relative_path) + return self._get_columns_information_from_file(file_name) + + def _get_columns_information_from_github(self): + org = "ersilia-os" + branch = "main" + url = "https://raw.githubusercontent.org/{0}/{1}/{2}/{3}".format( + org, self.model_id, branch, self.relative_path + ) + tmp_dir = tempfile.mkdtemp(prefix="ersilia-") + file_name = os.path.join(tmp_dir, "columns.csv") + try: + with urlopen(url) as response: + data = response.read() + with open(file_name, 'wb') as f: + f.write(data) + except Exception as e: + self.logger.debug( + "Explicit columns data for {0} API does not exist in GitHub".format( + self.api_name + ) + ) + self.logger.warning(f"Warning: {e}") + return None + + def _validate_columns_data(self, data): + for d in data["name"]: + if d[0].lower() != d[0]: + raise ValueError("Column names must be lowercase") + if not d.replace("_", "").isalnum(): + raise ValueError( + "Column names must be alphanumeric or contain underscores" + ) + for d in data["type"]: + if d not in self.DATA_TYPES: + raise ValueError( + "Type {0} is not an accepted type: {1}".format( + d, self.DATA_TYPES + ) + ) + for d in data["direction"]: + if d not in self.DESIRED_DIRECTIONS: + raise ValueError( + "Direction {0} is not an accepted direction: {1}".format( + d, self.DESIRED_DIRECTIONS + ) + ) + for d in data["description"]: + if len(d) < MIN_DESCRIPTION_LENGTH: + raise ValueError( + "Description is too short. A minimum of {0} characters is expected".format( + MIN_DESCRIPTION_LENGTH + ) + ) + + def load(self): + data = self._get_columns_information_from_local() + if data is None: + data = self._get_columns_information_from_github() + if data is None: + return None + self._validate_columns_data(data) + return data diff --git a/ersilia/hub/content/information.py b/ersilia/hub/content/information.py index 477676c31..f7eb0032d 100644 --- a/ersilia/hub/content/information.py +++ b/ersilia/hub/content/information.py @@ -7,6 +7,7 @@ emoji = None import click +from .columns_information import ColumnsInformation from ... import ErsiliaBase from ...default import ( PACKMODE_FILE, @@ -34,6 +35,7 @@ class Information(ErsiliaBase): config_json : dict, optional Configuration settings in JSON format. """ + def __init__(self, model_id, config_json=None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id @@ -105,6 +107,17 @@ def _get_apis_list(self): else: return None + def _get_columns(self): + columns_data = {} + api_names = self._get_apis_list() + for api_name in api_names: + ci = ColumnsInformation( + model_id=self.model_id, api_name=api_name, config_json=self.config_json + ) + data = ci.load() + columns_data[api_name] = data + return columns_data + def get(self) -> dict: """ Get various information about the model. @@ -123,6 +136,7 @@ def get(self) -> dict: "size": self._get_size(), "metadata": self._get_metadata(), "card": self._get_card(), + "columns": self._get_columns(), } return data @@ -141,6 +155,7 @@ class InformationDisplayer(ErsiliaBase): config_json : dict, optional Configuration settings in JSON format. """ + def __init__(self, info_data, config_json=None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.info_data = info_data diff --git a/ersilia/hub/content/metadata/biomedical_area.txt b/ersilia/hub/content/metadata/biomedical_area.txt new file mode 100644 index 000000000..35eb64407 --- /dev/null +++ b/ersilia/hub/content/metadata/biomedical_area.txt @@ -0,0 +1,3 @@ +ADMET +Malaria +Tuberculosis \ No newline at end of file diff --git a/ersilia/hub/content/metadata/output.txt b/ersilia/hub/content/metadata/output.txt index f6942bc15..900819e39 100644 --- a/ersilia/hub/content/metadata/output.txt +++ b/ersilia/hub/content/metadata/output.txt @@ -1,5 +1,7 @@ -Boolean Compound +Score +Value +Boolean Descriptor Distance Experimental value @@ -7,5 +9,4 @@ Image Other value Probability Protein -Score Text \ No newline at end of file diff --git a/ersilia/hub/content/metadata/output_consistency.txt b/ersilia/hub/content/metadata/output_consistency.txt new file mode 100644 index 000000000..0ea0e297e --- /dev/null +++ b/ersilia/hub/content/metadata/output_consistency.txt @@ -0,0 +1,3 @@ +Fixed +Semi-fixed +Variable \ No newline at end of file diff --git a/ersilia/hub/content/metadata/publication_type.txt b/ersilia/hub/content/metadata/publication_type.txt new file mode 100644 index 000000000..332ac613d --- /dev/null +++ b/ersilia/hub/content/metadata/publication_type.txt @@ -0,0 +1,3 @@ +Peer reviewed +Preprint +Other \ No newline at end of file diff --git a/ersilia/hub/content/metadata/source.txt b/ersilia/hub/content/metadata/source.txt new file mode 100644 index 000000000..549f7d3f9 --- /dev/null +++ b/ersilia/hub/content/metadata/source.txt @@ -0,0 +1,2 @@ +Local +Online \ No newline at end of file diff --git a/ersilia/hub/content/metadata/source_type.txt b/ersilia/hub/content/metadata/source_type.txt new file mode 100644 index 000000000..0ff13cb74 --- /dev/null +++ b/ersilia/hub/content/metadata/source_type.txt @@ -0,0 +1,3 @@ +External +Replicated +Internal \ No newline at end of file diff --git a/ersilia/hub/content/metadata/subtask.txt b/ersilia/hub/content/metadata/subtask.txt new file mode 100644 index 000000000..151f266dc --- /dev/null +++ b/ersilia/hub/content/metadata/subtask.txt @@ -0,0 +1,6 @@ +Featurization +Projection +Property calculation or prediction +Activity prediction +Similarity search +Generation \ No newline at end of file diff --git a/ersilia/hub/content/metadata/target_organism.txt b/ersilia/hub/content/metadata/target_organism.txt new file mode 100644 index 000000000..a70a2fe91 --- /dev/null +++ b/ersilia/hub/content/metadata/target_organism.txt @@ -0,0 +1,3 @@ +Human +Plasmodium spp. +Mycobacterium tuberculosis \ No newline at end of file diff --git a/ersilia/hub/content/metadata/task.txt b/ersilia/hub/content/metadata/task.txt index 3e4c1bd3e..ac571a366 100644 --- a/ersilia/hub/content/metadata/task.txt +++ b/ersilia/hub/content/metadata/task.txt @@ -1,7 +1,9 @@ +Representation +Annotation +Sampling Classification Regression Generative -Representation Similarity Clustering Dimensionality reduction \ No newline at end of file diff --git a/ersilia/hub/content/search.py b/ersilia/hub/content/search.py index ea7b2f913..aef8ff283 100644 --- a/ersilia/hub/content/search.py +++ b/ersilia/hub/content/search.py @@ -17,6 +17,7 @@ class ModelSearcher(object): catalog : CatalogTable The catalog table containing the models. """ + """This class is used for searching through the catalog table Attributes: diff --git a/ersilia/hub/content/slug.py b/ersilia/hub/content/slug.py index 0c69b00fb..dcb07c9e3 100644 --- a/ersilia/hub/content/slug.py +++ b/ersilia/hub/content/slug.py @@ -16,6 +16,7 @@ class Slug(ErsiliaBase): config_json : dict, optional Configuration settings in JSON format. """ + def __init__(self, config_json=None): ErsiliaBase.__init__(self, config_json=config_json) self.db = SlugDb(config_json=config_json) diff --git a/ersilia/hub/delete/delete.py b/ersilia/hub/delete/delete.py index 2a933807c..519741259 100644 --- a/ersilia/hub/delete/delete.py +++ b/ersilia/hub/delete/delete.py @@ -16,7 +16,11 @@ from ..bundle.status import ModelStatus from ...default import ISAURA_FILE_TAG, ISAURA_FILE_TAG_LOCAL -from ...utils.session import get_model_session, remove_session_dir, deregister_model_session +from ...utils.session import ( + get_model_session, + remove_session_dir, + deregister_model_session, +) def rmtree(path): @@ -453,7 +457,9 @@ def delete(self, model_id: str): ) ) dm = DockerManager(config_json=self.config_json) - if dm.is_active(): # TODO This is hacky but is needed by ModelPreparer when model is fetched. + if ( + dm.is_active() + ): # TODO This is hacky but is needed by ModelPreparer when model is fetched. dm.delete_images(model_id) @@ -584,10 +590,16 @@ def can_be_deleted(self, model_id: str) -> Tuple[bool, str]: dm = DockerManager(config_json=self.config_json) if needs_delete: if model_source == "DockerHub" and not dm.is_active(): - return False, "Model fetched through Docker but Docker engine is inactive." + return ( + False, + "Model fetched through Docker but Docker engine is inactive.", + ) return True, "Model can be deleted." else: - return False, f"Model {model_id} is not available locally, no delete necessary." + return ( + False, + f"Model {model_id} is not available locally, no delete necessary.", + ) def delete(self, model_id: str): """ diff --git a/ersilia/hub/fetch/actions/get.py b/ersilia/hub/fetch/actions/get.py index 220c9dbab..8c1e2e33d 100644 --- a/ersilia/hub/fetch/actions/get.py +++ b/ersilia/hub/fetch/actions/get.py @@ -27,8 +27,8 @@ class PackCreator(ErsiliaBase): """ - Class to create a pack for the model. The pack.py file loads a model, - packs it into a BentoML Service instance, and saves the service for deployment. + Class to create a pack for the model. The pack.py file loads a model, + packs it into a BentoML Service instance, and saves the service for deployment. Parameters ---------- @@ -37,6 +37,7 @@ class PackCreator(ErsiliaBase): config_json : dict Configuration settings for the model. """ + def __init__(self, model_id: str, config_json: dict): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id @@ -55,7 +56,7 @@ def run(self): class ServiceCreator(ErsiliaBase): """ - Class to create a service file for the model. The 'service.py' specifically + Class to create a service file for the model. The 'service.py' specifically facilitates the deployment of a custom model as a BENTOML REST API service. Parameters @@ -65,6 +66,7 @@ class ServiceCreator(ErsiliaBase): config_json : dict Configuration settings for the model. """ + def __init__(self, model_id: str, config_json: dict): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id @@ -117,6 +119,7 @@ class DockerfileCreator(ErsiliaBase): commands : list List of commands to be added to the Dockerfile. """ + def __init__(self, model_id: str, config_json: dict, commands: list): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id @@ -167,9 +170,9 @@ class TemplatePreparer(BaseAction): """ Class to prepare the template for the model. - In this context, a template refers to a predefined structure or set of files - that are necessary for setting up the model environment. This includes - configuration files, scripts, and other resources required to deploy and + In this context, a template refers to a predefined structure or set of files + that are necessary for setting up the model environment. This includes + configuration files, scripts, and other resources required to deploy and run the model. Parameters @@ -179,6 +182,7 @@ class TemplatePreparer(BaseAction): config_json : dict Configuration settings for the model. """ + def __init__(self, model_id: str, config_json: dict): BaseAction.__init__( self, model_id=model_id, config_json=config_json, credentials_json=None @@ -242,8 +246,14 @@ class ModelRepositoryGetter(BaseAction): repo_path : str Path to the local repository. """ + def __init__( - self, model_id: str, config_json: dict, force_from_github: bool, force_from_s3: bool, repo_path: str + self, + model_id: str, + config_json: dict, + force_from_github: bool, + force_from_s3: bool, + repo_path: str, ): BaseAction.__init__( self, model_id=model_id, config_json=config_json, credentials_json=None @@ -373,7 +383,7 @@ def get(self): #  TODO: work outside GIT LFS -class ModelParametersGetter(BaseAction): +class ModelParametersGetter(BaseAction): """ Class to get the model parameters. Getting the checkpoints, weights. @@ -384,6 +394,7 @@ class ModelParametersGetter(BaseAction): config_json : dict Configuration settings for the model. """ + def __init__(self, model_id: str, config_json: dict): BaseAction.__init__( self, model_id=model_id, config_json=config_json, credentials_json=None @@ -435,8 +446,14 @@ class ModelGetter(BaseAction): force_from_s3 : bool Force download from S3. """ + def __init__( - self, model_id: str, repo_path: str, config_json: dict, force_from_github: bool, force_from_s3: bool + self, + model_id: str, + repo_path: str, + config_json: dict, + force_from_github: bool, + force_from_s3: bool, ): BaseAction.__init__( self, model_id=model_id, config_json=config_json, credentials_json=None diff --git a/ersilia/hub/fetch/actions/inform.py b/ersilia/hub/fetch/actions/inform.py index 55d47097b..ca186fbcf 100644 --- a/ersilia/hub/fetch/actions/inform.py +++ b/ersilia/hub/fetch/actions/inform.py @@ -21,6 +21,7 @@ class ModelInformer(BaseAction): config_json : dict Configuration settings for the model. """ + def __init__(self, model_id, config_json): BaseAction.__init__( self, model_id=model_id, config_json=config_json, credentials_json=None diff --git a/ersilia/hub/fetch/actions/lake.py b/ersilia/hub/fetch/actions/lake.py index 442993a18..66a6d2b77 100644 --- a/ersilia/hub/fetch/actions/lake.py +++ b/ersilia/hub/fetch/actions/lake.py @@ -1,6 +1,7 @@ from ....utils.dvc import DVCFetcher from . import BaseAction + class LakeGetter(BaseAction): """ Class to fetch data from precalculated data from the DVC repository. @@ -12,6 +13,7 @@ class LakeGetter(BaseAction): config_json : dict Configuration settings for the model. """ + def __init__(self, model_id: str, config_json: dict): BaseAction.__init__( self, model_id=model_id, config_json=config_json, credentials_json=None diff --git a/ersilia/hub/fetch/actions/sniff_bentoml.py b/ersilia/hub/fetch/actions/sniff_bentoml.py index e920b8c60..1c47a5642 100644 --- a/ersilia/hub/fetch/actions/sniff_bentoml.py +++ b/ersilia/hub/fetch/actions/sniff_bentoml.py @@ -113,7 +113,9 @@ class ModelSniffer(BaseAction): """ def __init__(self, model_id: str, config_json: dict): - super().__init__(model_id=model_id, config_json=config_json, credentials_json=None) + super().__init__( + model_id=model_id, config_json=config_json, credentials_json=None + ) self.logger.debug("Initializing model for inferring its structure") self.model = ErsiliaModel( model_id, config_json=config_json, fetch_if_not_available=False diff --git a/ersilia/hub/fetch/actions/sniff_fastapi.py b/ersilia/hub/fetch/actions/sniff_fastapi.py index fcc44bc5f..bd48d154e 100644 --- a/ersilia/hub/fetch/actions/sniff_fastapi.py +++ b/ersilia/hub/fetch/actions/sniff_fastapi.py @@ -35,6 +35,7 @@ class BuiltinExampleReader(ErsiliaBase): output_example() -> list Returns a list of output examples. """ + def __init__(self, model_id: str, config_json: dict): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id @@ -102,6 +103,7 @@ class ModelSniffer(BaseAction): sniff() Infers the structure of the model. """ + def __init__(self, model_id: str, config_json: dict): BaseAction.__init__( self, model_id=model_id, config_json=config_json, credentials_json=None diff --git a/ersilia/hub/fetch/fetch.py b/ersilia/hub/fetch/fetch.py index 6c40c370b..c145ba212 100644 --- a/ersilia/hub/fetch/fetch.py +++ b/ersilia/hub/fetch/fetch.py @@ -69,6 +69,7 @@ class ModelFetcher(ErsiliaBase): fetcher = ModelFetcher(config_json=config) await fetcher.fetch(model_id="eosxxxx") """ + def __init__( self, config_json: dict = None, @@ -291,14 +292,21 @@ async def _fetch(self, model_id: str) -> FetchResult: if do_dockerhub: self.logger.debug("Decided to fetch from DockerHub") if not self.can_use_docker: - return FetchResult(fetch_success=False, reason="Docker is not installed or active on your system.") + return FetchResult( + fetch_success=False, + reason="Docker is not installed or active on your system.", + ) await self._fetch_from_dockerhub(model_id=model_id) - return FetchResult(fetch_success=True, reason="Model fetched successfully") + return FetchResult( + fetch_success=True, reason="Model fetched successfully" + ) do_hosted = self._decide_if_use_hosted(model_id=model_id) if do_hosted: self.logger.debug("Fetching from hosted") self._fetch_from_hosted(model_id=model_id) - return FetchResult(fetch_success=True, reason="Model fetched successfully") + return FetchResult( + fetch_success=True, reason="Model fetched successfully" + ) if self.overwrite is None: self.logger.debug("Overwriting") self.overwrite = True @@ -306,8 +314,13 @@ async def _fetch(self, model_id: str) -> FetchResult: self._fetch_not_from_dockerhub(model_id=model_id) return FetchResult(fetch_success=True, reason="Model fetched successfully") else: - self.logger.info("Model already exists on your system. If you want to fetch it again, please delete it first.") - return FetchResult(fetch_success=False, reason="Model already exists on your system. If you want to fetch it again, please delete the existing model first.") + self.logger.info( + "Model already exists on your system. If you want to fetch it again, please delete it first." + ) + return FetchResult( + fetch_success=False, + reason="Model already exists on your system. If you want to fetch it again, please delete the existing model first.", + ) async def fetch(self, model_id: str) -> bool: """ @@ -337,12 +350,16 @@ async def fetch(self, model_id: str) -> bool: except StandardModelExampleError: self.logger.debug("Standard model example failed, deleting artifacts") do_delete = yes_no_input( - "Do you want to delete the model artifacts? [Y/n]", - default_answer="Y") + "Do you want to delete the model artifacts? [Y/n]", + default_answer="Y", + ) if do_delete: md = ModelFullDeleter(overwrite=False) md.delete(model_id) - return FetchResult(fetch_success=False, reason="Could not successfully run a standard example from the model.") + return FetchResult( + fetch_success=False, + reason="Could not successfully run a standard example from the model.", + ) else: self.logger.debug("Writing model source to file") model_source_file = os.path.join( @@ -354,6 +371,8 @@ async def fetch(self, model_id: str) -> bool: self.logger.error(f"Error during folder creation: {error}") with open(model_source_file, "w") as f: f.write(self.model_source) - return FetchResult(fetch_success=True, reason="Model fetched successfully") + return FetchResult( + fetch_success=True, reason="Model fetched successfully" + ) else: return fr diff --git a/ersilia/hub/fetch/fetch_bentoml.py b/ersilia/hub/fetch/fetch_bentoml.py index d49dc614e..822a0338b 100644 --- a/ersilia/hub/fetch/fetch_bentoml.py +++ b/ersilia/hub/fetch/fetch_bentoml.py @@ -55,6 +55,7 @@ class ModelFetcherFromBentoML(ErsiliaBase): fetcher = ModelFetcherFromBentoML(config_json=config) fetcher.fetch(model_id="eosxxxx") """ + def __init__( self, config_json: dict = None, diff --git a/ersilia/hub/fetch/fetch_fastapi.py b/ersilia/hub/fetch/fetch_fastapi.py index dba72dfea..1a078d0dd 100644 --- a/ersilia/hub/fetch/fetch_fastapi.py +++ b/ersilia/hub/fetch/fetch_fastapi.py @@ -50,6 +50,7 @@ class ModelFetcherFromFastAPI(ErsiliaBase): fetcher = ModelFetcherFromFastAPI(config_json=config) fetcher.fetch(model_id="eosxxxx") """ + def __init__( self, config_json: dict = None, diff --git a/ersilia/hub/fetch/inner_template/pack.py b/ersilia/hub/fetch/inner_template/pack.py index 0af4b10cb..48cca2022 100644 --- a/ersilia/hub/fetch/inner_template/pack.py +++ b/ersilia/hub/fetch/inner_template/pack.py @@ -3,6 +3,7 @@ from src.service import Service from src.service import CHECKPOINTS_BASEDIR, FRAMEWORK_BASEDIR + def main(): """ Main function to load the model, pack it into a service, and save the service. @@ -17,5 +18,6 @@ def main(): service.pack("model", mdl) service.save() + if __name__ == "__main__": main() diff --git a/ersilia/hub/fetch/inner_template/src/service.py b/ersilia/hub/fetch/inner_template/src/service.py index 51da795aa..79484541c 100644 --- a/ersilia/hub/fetch/inner_template/src/service.py +++ b/ersilia/hub/fetch/inner_template/src/service.py @@ -18,7 +18,7 @@ FRAMEWORK_BASEDIR = "framework" -def load_model(framework_dir: str, checkpoints_dir: str) -> 'Model': +def load_model(framework_dir: str, checkpoints_dir: str) -> "Model": """ Load the model with the given framework and checkpoints directories. @@ -244,7 +244,7 @@ def _copy_framework(self, base_path): def _model_file_path(self, base_path): return os.path.join(base_path, self.name + self._extension) - def pack(self, model: Model) -> 'Artifact': + def pack(self, model: Model) -> "Artifact": """ Pack the model into the artifact. @@ -261,7 +261,7 @@ def pack(self, model: Model) -> 'Artifact': self._model = model return self - def load(self, path: str) -> 'Artifact': + def load(self, path: str) -> "Artifact": """ Load the model from the given path. diff --git a/ersilia/hub/fetch/lazy_fetchers/dockerhub.py b/ersilia/hub/fetch/lazy_fetchers/dockerhub.py index e0835f538..8ccdbcde9 100644 --- a/ersilia/hub/fetch/lazy_fetchers/dockerhub.py +++ b/ersilia/hub/fetch/lazy_fetchers/dockerhub.py @@ -256,9 +256,7 @@ async def modify_information(self, model_id: str): return None data["service_class"] = "pulled_docker" - data["size"] = ( - mp._get_size_of_local_docker_image_in_mb() - ) + data["size"] = mp._get_size_of_local_docker_image_in_mb() with open(information_file, "w") as outfile: json.dump(data, outfile, indent=4) diff --git a/ersilia/hub/fetch/pack/bentoml_pack/mode.py b/ersilia/hub/fetch/pack/bentoml_pack/mode.py index b29e99dab..531c13f6a 100644 --- a/ersilia/hub/fetch/pack/bentoml_pack/mode.py +++ b/ersilia/hub/fetch/pack/bentoml_pack/mode.py @@ -38,7 +38,12 @@ def __init__(self, model_id: str, config_json: dict): self.model_id = model_id self.versioner = Versioner(config_json=config_json) - def _correct_protobuf(self, version: dict, dockerfile: DockerfileFile, protobuf_version: str = "3.19.5") -> DockerfileFile: + def _correct_protobuf( + self, + version: dict, + dockerfile: DockerfileFile, + protobuf_version: str = "3.19.5", + ) -> DockerfileFile: if version["version"] == "0.11.0": self.logger.debug( "Custom Ersilia BentoML is used, no need for modifying protobuf version" diff --git a/ersilia/hub/fetch/pack/bentoml_pack/runners.py b/ersilia/hub/fetch/pack/bentoml_pack/runners.py index 8efbc21ae..68b43ae56 100644 --- a/ersilia/hub/fetch/pack/bentoml_pack/runners.py +++ b/ersilia/hub/fetch/pack/bentoml_pack/runners.py @@ -88,7 +88,9 @@ def _run(self): venv = self._setup() pack_snippet = """ python {0} - """.format(self.cfg.HUB.PACK_SCRIPT) + """.format( + self.cfg.HUB.PACK_SCRIPT + ) venv.run_commandlines(environment=DEFAULT_VENV, commandlines=pack_snippet) self._symlinks() @@ -172,7 +174,9 @@ def _run(self): env = self._setup() pack_snippet = """ python {0} - """.format(self.cfg.HUB.PACK_SCRIPT) + """.format( + self.cfg.HUB.PACK_SCRIPT + ) self.logger.debug("Using environment {0}".format(env)) self.logger.debug("Running command: {0}".format(pack_snippet.strip())) self.conda.run_commandlines(environment=env, commandlines=pack_snippet) diff --git a/ersilia/hub/fetch/pack/fastapi_pack/runners.py b/ersilia/hub/fetch/pack/fastapi_pack/runners.py index 98e6d6939..2c83a1609 100644 --- a/ersilia/hub/fetch/pack/fastapi_pack/runners.py +++ b/ersilia/hub/fetch/pack/fastapi_pack/runners.py @@ -31,6 +31,7 @@ class SystemPack(BasePack): packer = SystemPack(model_id="eosxxxx", config_json=config) packer.run() """ + def __init__(self, model_id: str, config_json: dict): BasePack.__init__(self, model_id, config_json) self.logger.debug("Initializing system packer") @@ -73,6 +74,7 @@ class CondaPack(BasePack): packer = CondaPack(model_id="eosxxxx", config_json=config) packer.run() """ + def __init__(self, model_id: str, config_json: dict): BasePack.__init__(self, model_id, config_json) self.conda = SimpleConda() diff --git a/ersilia/hub/fetch/register/register.py b/ersilia/hub/fetch/register/register.py index e70317171..d1887fddb 100644 --- a/ersilia/hub/fetch/register/register.py +++ b/ersilia/hub/fetch/register/register.py @@ -34,6 +34,7 @@ class ModelRegisterer(ErsiliaBase): registerer = ModelRegisterer(model_id="eosxxxx", config_json=config) await registerer.register(is_from_dockerhub=True) """ + def __init__(self, model_id: str, config_json: dict): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id @@ -171,7 +172,9 @@ def register_not_from_hosted(self): with open(file_name, "w") as f: json.dump(data, f) - async def register(self, is_from_dockerhub: bool = False, is_from_hosted: bool = False): + async def register( + self, is_from_dockerhub: bool = False, is_from_hosted: bool = False + ): """ Register the model based on its source. diff --git a/ersilia/hub/fetch/register/standard_example.py b/ersilia/hub/fetch/register/standard_example.py index 9a63a1ecb..382da2122 100644 --- a/ersilia/hub/fetch/register/standard_example.py +++ b/ersilia/hub/fetch/register/standard_example.py @@ -31,6 +31,7 @@ class ModelStandardExample(ErsiliaBase): example_runner = ModelStandardExample(model_id="model123", config_json=config) example_runner.run() """ + def __init__(self, model_id: str, config_json: dict): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id diff --git a/ersilia/hub/pull/pull.py b/ersilia/hub/pull/pull.py index 5d7933796..d275dff8e 100644 --- a/ersilia/hub/pull/pull.py +++ b/ersilia/hub/pull/pull.py @@ -41,6 +41,7 @@ class ModelPuller(ErsiliaBase): puller = ModelPuller(model_id="eosxxxx", config_json=config) await puller.async_pull() """ + def __init__(self, model_id: str, overwrite: bool = None, config_json: dict = None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.simple_docker = SimpleDocker() diff --git a/ersilia/io/annotated.py b/ersilia/io/annotated.py index 588530456..0d4b35e96 100644 --- a/ersilia/io/annotated.py +++ b/ersilia/io/annotated.py @@ -14,6 +14,7 @@ class AnnotatedDataTyper(object): annotated_shape : str The shape of the annotated data (e.g., "Single", "List", "Flexible List"). """ + def __init__(self, data, annotated_type, annotated_shape): self.data = data self.annotated_type = annotated_type diff --git a/ersilia/io/dataframe.py b/ersilia/io/dataframe.py index e3128e51e..667b50c62 100644 --- a/ersilia/io/dataframe.py +++ b/ersilia/io/dataframe.py @@ -19,6 +19,7 @@ class Dataframe(object): features : list, optional List of features. """ + def __init__(self, keys=None, inputs=None, texts=None, values=None, features=None): self.keys = keys self.inputs = inputs diff --git a/ersilia/io/input.py b/ersilia/io/input.py index bb07c7b21..b0d73bbec 100644 --- a/ersilia/io/input.py +++ b/ersilia/io/input.py @@ -28,6 +28,7 @@ class BaseIOGetter(ErsiliaBase): config_json : dict, optional Configuration JSON. """ + def __init__(self, config_json=None): ErsiliaBase.__init__(self, config_json=config_json) self.mc = ModelCard(config_json=config_json) @@ -128,6 +129,7 @@ class _GenericAdapter(object): BaseIO : object Base IO handler object. """ + def __init__(self, BaseIO): self.IO = BaseIO @@ -234,6 +236,7 @@ class GenericInputAdapter(object): config_json : dict, optional Configuration JSON. """ + def __init__( self, model_id=None, input_type=None, input_shape=None, config_json=None ): @@ -300,6 +303,7 @@ class ExampleGenerator(ErsiliaBase): config_json : dict, optional Configuration JSON. """ + def __init__(self, model_id, config_json=None): self.model_id = model_id self.IO = BaseIOGetter(config_json=config_json).get(model_id) @@ -351,7 +355,7 @@ def random_example(self, n_samples, file_name, simple): if file_name is None: data = [v for v in self.IO.example(n_samples)] if simple: - data = [{'input': d["input"]} for d in data] + data = [{"input": d["input"]} for d in data] return data else: extension = file_name.split(".")[-1] @@ -359,7 +363,7 @@ def random_example(self, n_samples, file_name, simple): with open(file_name, "w") as f: data = [v for v in self.IO.example(n_samples)] if simple: - data = [{'input': d["input"]} for d in data] + data = [{"input": d["input"]} for d in data] json.dump(data, f, indent=4) else: delimiter = self._get_delimiter(file_name) @@ -421,7 +425,7 @@ def example(self, n_samples, file_name, simple, try_predefined): if try_predefined is True and file_name is not None: self.logger.debug("Trying with predefined input") predefined_available = self.predefined_example(file_name) - + if predefined_available: with open(file_name, "r") as f: return f.read() diff --git a/ersilia/io/output.py b/ersilia/io/output.py index 4e44ae6ec..6780ade78 100644 --- a/ersilia/io/output.py +++ b/ersilia/io/output.py @@ -1,4 +1,3 @@ - import csv import os import json @@ -723,7 +722,9 @@ def merge(self, subfiles: list, output_file: str): fo.write(l) use_header = False - def _adapt_generic(self, result: dict, output: str, model_id: str = None, api_name: str = None) -> dict: + def _adapt_generic( + self, result: dict, output: str, model_id: str = None, api_name: str = None + ) -> dict: """ Adapts the output based on the result and model. @@ -834,7 +835,9 @@ def _adapt_when_fastapi_was_used( pass return result - def adapt(self, result: dict, output: str, model_id: str = None, api_name: str = None) -> dict: + def adapt( + self, result: dict, output: str, model_id: str = None, api_name: str = None + ) -> dict: """ Adapts the output based on the result and model. @@ -1066,4 +1069,4 @@ def stack(self, output: str): if self.is_hdf5: self.stack_hdf5(output) else: - self.stack_text(output) \ No newline at end of file + self.stack_text(output) diff --git a/ersilia/io/output_logger.py b/ersilia/io/output_logger.py index 01dd7eb36..9e343782d 100644 --- a/ersilia/io/output_logger.py +++ b/ersilia/io/output_logger.py @@ -16,6 +16,7 @@ class TabularResultLogger(object): ---------- None """ + def __init__(self): pass diff --git a/ersilia/io/pure.py b/ersilia/io/pure.py index 8f7d71986..fece02d4f 100644 --- a/ersilia/io/pure.py +++ b/ersilia/io/pure.py @@ -26,6 +26,7 @@ class PureDataTyper(ErsiliaBase): >>> data_typer.get_type() {'type': 'numeric_array', 'shape': (3,)} """ + def __init__(self, data: any, model_id: str = None, config_json: str = None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.data = data diff --git a/ersilia/io/readers/file.py b/ersilia/io/readers/file.py index 18315580c..79d8333b0 100644 --- a/ersilia/io/readers/file.py +++ b/ersilia/io/readers/file.py @@ -25,6 +25,7 @@ class FileTyper(object): path : str Path to the file. """ + def __init__(self, path): self.path = os.path.join(path) @@ -142,6 +143,7 @@ class BatchCacher(object): """ Class to handle caching of file batches. """ + def __init__(self): self.tmp_folder = make_temp_dir(prefix="ersilia-") @@ -236,6 +238,7 @@ class BaseTabularFile(object): sniff_line_limit : int, optional Line limit for sniffing the file. """ + def __init__( self, path, @@ -612,6 +615,7 @@ class TabularFileShapeStandardizer(BaseTabularFile): tfss = TabularFileShapeStandardizer("data.csv", "standard_data.csv", "single", IOHandler()) tfss.standardize() """ + def __init__(self, src_path, dst_path, input_shape, IO, sniff_line_limit=100): if type(input_shape) is str: self.input_shape = InputShape(input_shape).get() @@ -734,6 +738,7 @@ class StandardTabularFileReader(BatchCacher): path : str Path to the file. """ + def __init__(self, path): BatchCacher.__init__(self) self.path = os.path.abspath(path) @@ -850,6 +855,7 @@ class TabularFileReader(StandardTabularFileReader): sniff_line_limit : int, optional Line limit for sniffing the file. """ + def __init__(self, path, IO, sniff_line_limit=100): self.src_path = os.path.abspath(path) self.tmp_folder = make_temp_dir(prefix="ersilia-") @@ -922,6 +928,7 @@ class BaseJsonFile(object): expected_number : int Expected number of elements. """ + def __init__(self, path, IO, entity_is_list, expected_number): self.logger = logger self.path = os.path.abspath(path) @@ -998,6 +1005,7 @@ class JsonFileShapeStandardizer(BaseJsonFile): IO : object IO handler object. """ + def __init__(self, src_path, dst_path, input_shape, IO): self.src_path = os.path.abspath(src_path) self.dst_path = os.path.abspath(dst_path) @@ -1053,6 +1061,7 @@ class StandardJsonFileReader(BatchCacher): >>> sjfr.read() [{'key': 'value'}, {'key': 'value'}] """ + def __init__(self, path): BatchCacher.__init__(self) self.path = os.path.abspath(path) @@ -1123,6 +1132,7 @@ class JsonFileReader(StandardJsonFileReader): IO : object IO handler object. """ + def __init__(self, path, IO): self.src_path = os.path.abspath(path) self.tmp_folder = make_temp_dir(prefix="ersilia-") diff --git a/ersilia/io/readers/pyinput.py b/ersilia/io/readers/pyinput.py index dba39a8ff..c8586cfb4 100644 --- a/ersilia/io/readers/pyinput.py +++ b/ersilia/io/readers/pyinput.py @@ -12,6 +12,7 @@ class PyInputReader(object): IO : object IO handler object. """ + def __init__(self, input, IO): self.IO = IO self.input_shape = IO.input_shape diff --git a/ersilia/io/shape.py b/ersilia/io/shape.py index b7b09da36..46241ba39 100644 --- a/ersilia/io/shape.py +++ b/ersilia/io/shape.py @@ -2,6 +2,7 @@ class InputShapeSingle(object): """ A class representing a single input shape. """ + def __init__(self): self.name = "Single" @@ -10,6 +11,7 @@ class InputShapeList(object): """ A class representing a list input shape. """ + def __init__(self): self.name = "List" @@ -18,6 +20,7 @@ class InputShapePairOfLists(object): """ A class representing a pair of lists input shape. """ + def __init__(self): self.name = "Pair of Lists" @@ -39,6 +42,7 @@ class InputShape(object): >>> shape.get().name 'List' """ + def __init__(self, input_shape: str = None): if input_shape is None: self.shape = InputShapeSingle() diff --git a/ersilia/io/types/compound.py b/ersilia/io/types/compound.py index 2d5b1c4a0..e528ffd08 100644 --- a/ersilia/io/types/compound.py +++ b/ersilia/io/types/compound.py @@ -39,6 +39,7 @@ class IO(object): >>> list(io.example(1)) [{'key': '...', 'input': '...', 'text': '...'}] """ + def __init__(self, input_shape): self.logger = logger self.input_shape = input_shape diff --git a/ersilia/io/types/text.py b/ersilia/io/types/text.py index 1f8f30399..314018242 100644 --- a/ersilia/io/types/text.py +++ b/ersilia/io/types/text.py @@ -22,6 +22,7 @@ class IO(object): input_shape : object Input shape specification. """ + def __init__(self, input_shape): self.logger = logger self.input_shape = input_shape diff --git a/ersilia/lake/base.py b/ersilia/lake/base.py index 64eeb0246..577638f66 100644 --- a/ersilia/lake/base.py +++ b/ersilia/lake/base.py @@ -22,6 +22,7 @@ class LakeBase(ErsiliaBase): lake_dir : str or None Absolute path to the lake directory if ISAURA_REPOSITORY_PATH is set, otherwise None. """ + def __init__(self, config_json: dict): ErsiliaBase.__init__(self, config_json=config_json) if ISAURA_REPOSITORY_PATH is not None: diff --git a/ersilia/lake/interface.py b/ersilia/lake/interface.py index 7bba8e546..bd23b6ec6 100644 --- a/ersilia/lake/interface.py +++ b/ersilia/lake/interface.py @@ -38,6 +38,7 @@ class IsauraInterface(LakeBase): is_available : bool True if Hdf5ApiExplorer is available, otherwise False. """ + def __init__(self, model_id: str, api_name: str, config_json: dict): LakeBase.__init__(self, config_json=config_json) self.model_id = model_id diff --git a/ersilia/lake/manager.py b/ersilia/lake/manager.py index 250cac0f9..01a40fe92 100644 --- a/ersilia/lake/manager.py +++ b/ersilia/lake/manager.py @@ -30,6 +30,7 @@ class IsauraManager(ErsiliaBase): hdf5 : Hdf5Explorer Instance of Hdf5Explorer for managing HDF5 operations. """ + def __init__(self, model_id: str, config_json: dict, credentials_json: dict): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json diff --git a/ersilia/lake/s3_logger.py b/ersilia/lake/s3_logger.py index 0bc835f7a..f6f5ce615 100644 --- a/ersilia/lake/s3_logger.py +++ b/ersilia/lake/s3_logger.py @@ -27,6 +27,7 @@ class S3Logger(ErsiliaBase): aws_secret_access_key : str or None AWS secret access key. """ + def __init__(self, model_id: str, config_json: dict = None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id diff --git a/ersilia/publish/deploy.py b/ersilia/publish/deploy.py index e2092ae61..f97243b53 100644 --- a/ersilia/publish/deploy.py +++ b/ersilia/publish/deploy.py @@ -17,6 +17,7 @@ class DeployBase(ErsiliaBase): credentials_json : str, optional Path to the credentials JSON file. """ + def __init__(self, config_json=None, credentials_json=None): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -143,6 +144,7 @@ class Local(DeployBase): credentials_json : str, optional Path to the credentials JSON file. """ + def __init__(self, config_json=None, credentials_json=None): DeployBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -169,7 +171,7 @@ def deploy(self, model_id: str): class Heroku(DeployBase): """ - Class for Heroku deployment, in a cloud platform that allows developers to build, run, and operate applications entirely in the cloud. + Class for Heroku deployment, in a cloud platform that allows developers to build, run, and operate applications entirely in the cloud. Parameters ---------- @@ -185,6 +187,7 @@ class Heroku(DeployBase): deployer = Heroku(config_json="path/to/config.json", credentials_json="path/to/credentials.json") deployer.deploy("model_id") """ + def __init__(self, config_json=None, credentials_json=None): DeployBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -220,7 +223,7 @@ def deploy(self, model_id: str): """ Deploy the model to Heroku. - This method handles the entire deployment process to Heroku, including setting up temporary directories, + This method handles the entire deployment process to Heroku, including setting up temporary directories, creating the Heroku app, pushing the Docker container, and releasing the app. Parameters @@ -263,6 +266,7 @@ class Aws(DeployBase): credentials_json : str, optional Path to the credentials JSON file. """ + def __init__(self, config_json=None, credentials_json=None): DeployBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -295,6 +299,7 @@ class GoogleCloud(DeployBase): credentials_json : str, optional Path to the credentials JSON file. """ + def __init__(self, config_json=None, credentials_json=None): DeployBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -327,6 +332,7 @@ class Azure(ErsiliaBase): credentials_json : str, optional Path to the credentials JSON file. """ + def __init__(self, config_json=None, credentials_json=None): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -368,6 +374,7 @@ class Deployer(object): deployer = Deployer(cloud="heroku", config_json="path/to/config.json", credentials_json="path/to/credentials.json") deployer.deploy("model_id") """ + def __init__(self, cloud="heroku", config_json=None, credentials_json=None): """Initialize a cloud deployer. For now, only 'heroku' is available.""" self.cloud = cloud diff --git a/ersilia/publish/dockerhub.py b/ersilia/publish/dockerhub.py index 2ac4a0a5f..d3e1a1594 100644 --- a/ersilia/publish/dockerhub.py +++ b/ersilia/publish/dockerhub.py @@ -23,6 +23,7 @@ class DockerHubUploader(ErsiliaBase): uploader.set_credentials(docker_user="username", docker_pwd="password") uploader.upload() """ + def __init__(self, model_id: str, config_json=None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.model_id = model_id diff --git a/ersilia/publish/inspect.py b/ersilia/publish/inspect.py index 783bb237d..1b8e7e002 100644 --- a/ersilia/publish/inspect.py +++ b/ersilia/publish/inspect.py @@ -8,14 +8,14 @@ from ..hub.fetch.actions.template_resolver import TemplateResolver from ..utils.logging import logger from ..default import ( - INSTALL_YAML_FILE, + INSTALL_YAML_FILE, DOCKERFILE_FILE, PACK_METHOD_FASTAPI, PACK_METHOD_BENTOML, METADATA_JSON_FILE, METADATA_YAML_FILE, RUN_FILE, - PREDEFINED_EXAMPLE_FILES + PREDEFINED_EXAMPLE_FILES, ) Result = namedtuple("Result", ["success", "details"]) @@ -26,6 +26,7 @@ REPO_API_URL = "https://api.github.com/repos/ersilia-os/{model}/contents" USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" + class ModelInspector: """ Class for inspecting model repositories. @@ -47,18 +48,15 @@ class ModelInspector: result = inspector.check_repo_exists() result = inspector.check_complete_metadata() """ + RUN_FILE = f"model/framework/{RUN_FILE}" - + COMMON_FILES = [ RUN_FILE, "README.md", "LICENSE", ] - BENTOML_FOLDERS = [ - "model", - "src", - ".github" - ] + BENTOML_FOLDERS = ["model", "src", ".github"] BENTOML_FILES = [ DOCKERFILE_FILE, METADATA_JSON_FILE, @@ -67,11 +65,8 @@ class ModelInspector: ".gitignore", "input.csv", ] - - ERSILIAPACK_FOLDERS = [ - "model", - ".github" - ] + + ERSILIAPACK_FOLDERS = ["model", ".github"] ERSILIAPACK_FILES = [ INSTALL_YAML_FILE, @@ -83,15 +78,10 @@ class ModelInspector: ".gitattributes", ] - BENTOML_FILES = COMMON_FILES + BENTOML_FILES + BENTOML_FILES = COMMON_FILES + BENTOML_FILES ERSILIAPACK_FILES = COMMON_FILES + ERSILIAPACK_FILES - REQUIRED_FIELDS = [ - "Publication", - "Source Code", - "S3", - "DockerHub" - ] + REQUIRED_FIELDS = ["Publication", "Source Code", "S3", "DockerHub"] def __init__(self, model: str, dir: str, config_json=None): self.model = model @@ -100,7 +90,7 @@ def __init__(self, model: str, dir: str, config_json=None): self.content_url = RAW_CONTENT_URL.format(model=model) self.config_json = config_json self.pack_type = self.get_pack_type() - + def get_pack_type(self): """ Determine the packaging method of the model. @@ -110,17 +100,14 @@ def get_pack_type(self): str The packaging method, either 'bentoml' or 'fastapi'. """ - resolver = TemplateResolver( - model_id=self.model, - repo_path=self.dir - ) + resolver = TemplateResolver(model_id=self.model, repo_path=self.dir) if resolver.is_bentoml(): return PACK_METHOD_BENTOML elif resolver.is_fastapi(): return PACK_METHOD_FASTAPI else: return None - + def check_repo_exists(self): """ Check if the model repository exists. @@ -131,14 +118,8 @@ def check_repo_exists(self): A namedtuple containing the success status and details of the check. """ if self._url_exists(self.repo_url): - return Result( - True, - "Repository exists." - ) - return Result( - False, - f"Repository not found at {self.repo_url}." - ) + return Result(True, "Repository exists.") + return Result(False, f"Repository not found at {self.repo_url}.") def check_complete_metadata(self): """ @@ -149,32 +130,25 @@ def check_complete_metadata(self): Result A namedtuple containing the success status and details of the check. """ - url = f"{self.content_url}{METADATA_JSON_FILE}" if self.pack_type == "bentoml" \ + url = ( + f"{self.content_url}{METADATA_JSON_FILE}" + if self.pack_type == "bentoml" else f"{self.content_url}{METADATA_YAML_FILE}" + ) if not self._url_exists(url): - return Result( - False, - f"Metadata file missing at {url}." - ) + return Result(False, f"Metadata file missing at {url}.") metadata = self._fetch_json(url) if metadata is None: - return Result( - False, - "Failed to fetch or parse metadata." - ) + return Result(False, "Failed to fetch or parse metadata.") missing_fields = [ - field - for field - in self.REQUIRED_FIELDS - if field not in metadata + field for field in self.REQUIRED_FIELDS if field not in metadata ] invalid_urls = [ - (field, metadata[field]) - for field in self.REQUIRED_FIELDS - if field in metadata - and not self._url_exists(metadata[field]) + (field, metadata[field]) + for field in self.REQUIRED_FIELDS + if field in metadata and not self._url_exists(metadata[field]) ] details = [] @@ -185,9 +159,7 @@ def check_complete_metadata(self): ) if invalid_urls: details.extend( - f"Invalid URL in '{field}': {url}" - for field, url - in invalid_urls + f"Invalid URL in '{field}': {url}" for field, url in invalid_urls ) try: @@ -197,9 +169,9 @@ def check_complete_metadata(self): if details: return Result(False, " ".join(details)) - + return Result(True, "Metadata is complete.") - + def check_dependencies_are_valid(self): """ Check if the dependencies in the Dockerfile or install.yml are valid. @@ -209,23 +181,17 @@ def check_dependencies_are_valid(self): Result A namedtuple containing the success status and details of the check. """ - if self.pack_type not in [ - PACK_METHOD_BENTOML, - PACK_METHOD_FASTAPI - ]: - return Result( - False, - f"Unsupported pack type: {self.pack_type}" - ) + if self.pack_type not in [PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI]: + return Result(False, f"Unsupported pack type: {self.pack_type}") file = ( - DOCKERFILE_FILE - if self.pack_type == PACK_METHOD_BENTOML + DOCKERFILE_FILE + if self.pack_type == PACK_METHOD_BENTOML else INSTALL_YAML_FILE ) method = ( - self._validate_dockerfile - if self.pack_type == PACK_METHOD_BENTOML + self._validate_dockerfile + if self.pack_type == PACK_METHOD_BENTOML else self._validate_yml ) @@ -236,11 +202,11 @@ def check_dependencies_are_valid(self): errors = method(content) if errors: return Result(False, " ".join(errors)) - + return Result(True, f"{file} dependencies are valid.") def _get_file_content(self, file): - if self.dir is not None: + if self.dir is not None: path = os.path.join(self.dir, file) if not os.path.isfile(path): return None, f"{file} not found at {path}" @@ -248,20 +214,14 @@ def _get_file_content(self, file): with open(path, "r") as file: return file.read(), None except Exception as e: - return ( - None, - f"Failed to read {file} content: {str(e)}" - ) - else: + return (None, f"Failed to read {file} content: {str(e)}") + else: url = f"{self.content_url}{file}" if not self._url_exists(url): return None, f"{file} not found at {url}" content = self._fetch_text(url) if content is None: - return ( - None, - f"Failed to fetch {file} content." - ) + return (None, f"Failed to fetch {file} content.") return content, None def check_complete_folder_structure(self): @@ -275,14 +235,8 @@ def check_complete_folder_structure(self): """ invalid_items = self.validate_repo_structure() if invalid_items: - return Result( - False, - f"Missing folders: {', '.join(invalid_items)}" - ) - return Result( - True, - "Folder structure is complete." - ) + return Result(False, f"Missing folders: {', '.join(invalid_items)}") + return Result(True, "Folder structure is complete.") def check_computational_performance(self): """ @@ -322,16 +276,10 @@ def check_no_extra_files(self): for root, dirs, files in os.walk(self.dir): relative_path = os.path.relpath(root, self.dir) items_in_dir = [ - os.path.join(relative_path, item) - for item - in files + dirs + os.path.join(relative_path, item) for item in files + dirs ] unexpected_items.extend( - item - for item - in items_in_dir - if item - not in expected_items + item for item in items_in_dir if item not in expected_items ) if unexpected_items: @@ -342,10 +290,7 @@ def check_no_extra_files(self): url = REPO_API_URL.format(model=self.model) if not self._url_exists(url): - return Result( - False, - f"Failed to access repository contents at: {url}" - ) + return Result(False, f"Failed to access repository contents at: {url}") headers = { "Accept": "application/vnd.github.v3+json", } @@ -361,8 +306,7 @@ def check_no_extra_files(self): if unexpected_items: return Result( - False, - f"Unexpected items found: {', '.join(unexpected_items)}" + False, f"Unexpected items found: {', '.join(unexpected_items)}" ) return Result(True, "No extra files found.") @@ -371,7 +315,7 @@ def _url_exists(self, url): try: headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "User-Agent":USER_AGENT + "User-Agent": USER_AGENT, } response = requests.head(url, headers=headers) logger.debug(f"URl: {url} | status code: {response.status_code}") @@ -383,10 +327,7 @@ def _fetch_json(self, url): try: response = requests.get(url) return response.json() - except ( - requests.RequestException, - ValueError - ): + except (requests.RequestException, ValueError): return None def _fetch_text(self, url): @@ -406,22 +347,22 @@ def _validate_urls(self, metadata, fields): def _validate_repo_structure(self, required_items): missing_items = [] - - if self.dir is not None: + + if self.dir is not None: for item in required_items: item_path = os.path.join(self.dir, item) if not os.path.isfile(item_path): missing_items.append(item) - else: + else: for item in required_items: url = f"{RAW_CONTENT_URL.format(model=self.model)}{item}" response = requests.head(url) if response.status_code != 200: logger.debug(f"URL: {url} | STatus Code: {response.status_code}") missing_items.append(item) - + return missing_items - + def validate_repo_structure(self): logger.debug(f"Pack Type: {self.pack_type}") if self.pack_type == PACK_METHOD_BENTOML: @@ -430,43 +371,41 @@ def validate_repo_structure(self): required_items = self.ERSILIAPACK_FILES else: raise ValueError(f"Unsupported pack type: {self.pack_type}") - + return self._validate_repo_structure(required_items) - + def _validate_dockerfile(self, dockerfile_content): lines, errors = dockerfile_content.splitlines(), [] for line in lines: if line.startswith("RUN pip install"): cmd = line.split("RUN ")[-1] result = subprocess.run( - cmd, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True + cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, ) if result.returncode != 0: - errors.append( - f"Failed to run {cmd}: {result.stderr.strip()}" - ) + errors.append(f"Failed to run {cmd}: {result.stderr.strip()}") if "WORKDIR /repo" not in dockerfile_content: errors.append("Missing 'WORKDIR /repo'.") if "COPY . /repo" not in dockerfile_content: errors.append("Missing 'COPY . /repo'.") return errors - + def _validate_yml(self, yml_content): errors = [] try: yml_data = yaml.safe_load(yml_content) except yaml.YAMLError as e: return [f"YAML parsing error: {str(e)}"] - + python_version = yml_data.get("python") if not python_version: errors.append("Missing Python version in install.yml.") - + commands = yml_data.get("commands", []) for command in commands: if not isinstance(command, list) or command[0] != "pip": @@ -485,23 +424,15 @@ def _run_performance_check(self, n): cmd = ( f"ersilia serve {self.model}&& " f"ersilia example -n {n} -c -f my_input.csv && " - "ersilia run -i my_input.csv && ersilia close" + "ersilia run -i my_input.csv && ersilia close" ) start_time = time.time() process = subprocess.run( - cmd, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True + cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) if process.returncode != 0: - return Result( - False, - f"Error serving model: {process.stderr.strip()}" - ) + return Result(False, f"Error serving model: {process.stderr.strip()}") execution_time = time.time() - start_time return Result( - True, - f"{n} predictions executed in {execution_time:.2f} seconds." - ) \ No newline at end of file + True, f"{n} predictions executed in {execution_time:.2f} seconds." + ) diff --git a/ersilia/publish/publish.py b/ersilia/publish/publish.py index 2105e873a..9da654c42 100644 --- a/ersilia/publish/publish.py +++ b/ersilia/publish/publish.py @@ -23,6 +23,7 @@ class ModelPublisher(ErsiliaBase): credentials_json : str Path to the credentials JSON file. """ + def __init__(self, model_id, config_json, credentials_json): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json diff --git a/ersilia/publish/rebase.py b/ersilia/publish/rebase.py index a26e40b8a..bab86cae3 100644 --- a/ersilia/publish/rebase.py +++ b/ersilia/publish/rebase.py @@ -51,8 +51,17 @@ class TemplateRebaser(ErsiliaBase): credentials_json : str, optional Path to the credentials JSON file. """ - def __init__(self, model_id: str, template_repo="eos-template", config_json=None, credentials_json=None): - ErsiliaBase.__init__(self, config_json=config_json, credentials_json=credentials_json) + + def __init__( + self, + model_id: str, + template_repo="eos-template", + config_json=None, + credentials_json=None, + ): + ErsiliaBase.__init__( + self, config_json=config_json, credentials_json=credentials_json + ) self.model_id = model_id self.template_repo = template_repo self.root = os.path.abspath(self._tmp_dir) @@ -60,7 +69,9 @@ def __init__(self, model_id: str, template_repo="eos-template", config_json=None self.model_path = os.path.join(self.root, self.model_id) self.template_path = os.path.join(self.root, self.template_repo) self.clean() - self.file_folder_rebaser = _FileFolderRebaser(self.model_path, self.template_path) + self.file_folder_rebaser = _FileFolderRebaser( + self.model_path, self.template_path + ) def clone_template(self): """ diff --git a/ersilia/publish/s3.py b/ersilia/publish/s3.py index 8eae4a50f..708a4db6f 100644 --- a/ersilia/publish/s3.py +++ b/ersilia/publish/s3.py @@ -31,6 +31,7 @@ class S3BucketRepoUploader(ErsiliaBase): uploader.set_credentials(aws_access_key_id="access_key", aws_secret_access_key="secret_key") uploader.upload() """ + def __init__(self, model_id: str, config_json=None): self.model_id = model_id ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) @@ -40,7 +41,7 @@ def __init__(self, model_id: str, config_json=None): self.aws_access_key_id = None self.aws_secret_access_key = None self.ignore = ["upload_model_to_s3.py"] - + def _clone(self): self.logger.debug("Cloning model {0} from ersilia-os".format(self.model_id)) run_command( @@ -105,7 +106,7 @@ def _zipdir(self, repo_path, ziph): os.path.join(root, file), os.path.join(repo_path, "..") ), ) - + def _zip_model(self, repo_path): repo_path = os.path.abspath(repo_path) self.zip_model_file = os.path.join(self.tmp_zip_folder, self.model_id + ".zip") diff --git a/ersilia/publish/store.py b/ersilia/publish/store.py index b8ce04d37..a8b373b09 100644 --- a/ersilia/publish/store.py +++ b/ersilia/publish/store.py @@ -25,6 +25,7 @@ class ModelStorager(ErsiliaBase): overwrite : bool, optional Whether to overwrite existing files. Default is True. """ + def __init__(self, config_json=None, credentials_json=None, overwrite=True): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -95,6 +96,7 @@ class ModelRemover(ErsiliaBase): remover = ModelRemover(config_json="path/to/config.json", credentials_json="path/to/credentials.json") remover.remove(model_id="model_id") """ + def __init__(self, config_json=None, credentials_json=None): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json diff --git a/ersilia/publish/test.py b/ersilia/publish/test.py index 401c52524..59c5acbe1 100644 --- a/ersilia/publish/test.py +++ b/ersilia/publish/test.py @@ -21,15 +21,15 @@ from ..utils.terminal import run_command_check_output from ..hub.fetch.actions.template_resolver import TemplateResolver from ..default import ( - INFORMATION_FILE, - INSTALL_YAML_FILE, + INFORMATION_FILE, + INSTALL_YAML_FILE, DOCKERFILE_FILE, PACK_METHOD_FASTAPI, PACK_METHOD_BENTOML, METADATA_JSON_FILE, METADATA_YAML_FILE, RUN_FILE, - PREDEFINED_EXAMPLE_FILES + PREDEFINED_EXAMPLE_FILES, ) MISSING_PACKAGES = False @@ -52,6 +52,7 @@ class Options(Enum): OUTPUT2_CSV = "output2.csv" LEVEL_DEEP = "deep" + class TableType(Enum): MODEL_INFORMATION_CHECKS = "Model Information Checks" MODEL_FILE_CHECKS = "Model File Checks" @@ -60,41 +61,39 @@ class TableType(Enum): FINAL_RUN_SUMMARY = "Test Run Summary" INSPECT_SUMMARY = "Inspect Summary" + @dataclass class TableConfig: title: str headers: List[str] + TABLE_CONFIGS = { TableType.MODEL_INFORMATION_CHECKS: TableConfig( - title="Model Information Checks", - headers=["Check", "Status"] + title="Model Information Checks", headers=["Check", "Status"] ), TableType.MODEL_FILE_CHECKS: TableConfig( - title="Model File Checks", - headers=["Check", "Status"] + title="Model File Checks", headers=["Check", "Status"] ), TableType.MODEL_DIRECTORY_SIZES: TableConfig( - title="Model Directory Sizes", - headers=["Dest dir", "Env Dir"] + title="Model Directory Sizes", headers=["Dest dir", "Env Dir"] ), TableType.RUNNER_CHECKUP_STATUS: TableConfig( title="Runner Checkup Status", headers=["Runner", "Status"], ), - TableType.FINAL_RUN_SUMMARY: TableConfig( - title="Test Run Summary", - headers=["Check", "Status"] + TableType.FINAL_RUN_SUMMARY: TableConfig( + title="Test Run Summary", headers=["Check", "Status"] ), - TableType.INSPECT_SUMMARY: TableConfig( - title="Inspect Summary", - headers=["Check", "Status"] + TableType.INSPECT_SUMMARY: TableConfig( + title="Inspect Summary", headers=["Check", "Status"] ), } + class STATUS_CONFIGS(Enum): - PASSED = ("PASSED", "green", "✔") - FAILED = ("FAILED", "red", "✘") + PASSED = ("PASSED", "green", "✔") + FAILED = ("FAILED", "red", "✘") WARNING = ("WARNING", "yellow", "⚠") SUCCESS = ("SUCCESS", "green", "★") NA = ("N/A", "dim", "~") @@ -107,6 +106,7 @@ def __init__(self, label, color, icon): def __str__(self): return f"[{self.color}]{self.icon} {self.label}[/{self.color}]" + # fmt: off class TestResult(Enum): DATE_TIME_RUN = ( @@ -1759,4 +1759,4 @@ def setup(self): self.setup_service.check_conda_env() def run(self, output_file=None): - self.runner.run(output_file) \ No newline at end of file + self.runner.run(output_file) diff --git a/ersilia/serve/api.py b/ersilia/serve/api.py index a0503a830..88e9338ca 100644 --- a/ersilia/serve/api.py +++ b/ersilia/serve/api.py @@ -55,7 +55,7 @@ def __init__(self, model_id, url, api_name, save_to_lake, config_json): model_id=model_id, api_name=api_name, config_json=config_json ) self.save_to_lake = save_to_lake - if (url[-1] == "/"): + if url[-1] == "/": self.url = url[:-1] else: self.url = url diff --git a/ersilia/serve/services.py b/ersilia/serve/services.py index 04376332c..acb582e4a 100644 --- a/ersilia/serve/services.py +++ b/ersilia/serve/services.py @@ -106,19 +106,19 @@ def _get_apis_from_fastapi(self): def _get_apis_from_where_available(self): apis_list = self._get_apis_from_apis_list() - if (apis_list is None): + if apis_list is None: pack_method = resolve_pack_method( model_path=self._get_bundle_location(self.model_id) ) - if (pack_method == PACK_METHOD_FASTAPI): + if pack_method == PACK_METHOD_FASTAPI: self.logger.debug("Getting APIs from FastAPI") apis_list = self._get_apis_from_fastapi() - elif (pack_method == PACK_METHOD_BENTOML): + elif pack_method == PACK_METHOD_BENTOML: self.logger.debug("Getting APIs from BentoML") apis_list = self._get_apis_from_bento() else: raise - if (apis_list is None): + if apis_list is None: apis_list = [] for api in apis_list: yield api @@ -1237,7 +1237,7 @@ def _get_apis(self): self.logger.debug("Status code: {0}".format(response.status_code)) if response.status_code == 502: raise BadGatewayError(url) - elif response.status_code == 405: # We try the GET endpoint here + elif response.status_code == 405: # We try the GET endpoint here response = requests.get(url) else: response.raise_for_status() @@ -1514,4 +1514,4 @@ def close(self): """ Close the hosted service. """ - pass \ No newline at end of file + pass diff --git a/ersilia/serve/standard_api.py b/ersilia/serve/standard_api.py index 16fe6e96a..24affd609 100644 --- a/ersilia/serve/standard_api.py +++ b/ersilia/serve/standard_api.py @@ -48,6 +48,7 @@ class StandardCSVRunApi(ErsiliaBase): result = api.post(input_data, output_data) print(result) """ + def __init__(self, model_id, url, config_json=None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self.logger.info( @@ -210,9 +211,10 @@ def get_expected_output_header(self): with open(file, "r") as f: reader = csv.reader(f) header = next(reader) - if ( - header[0:2] != ["key", "input"] - ): # Slicing doesn't raise an error even if the list does not have 2 elements + if header[0:2] != [ + "key", + "input", + ]: # Slicing doesn't raise an error even if the list does not have 2 elements header = ["key", "input"] + header return header except (FileNotFoundError, StopIteration): diff --git a/ersilia/setup/baseconda.py b/ersilia/setup/baseconda.py index a00641fcd..4afdf0804 100644 --- a/ersilia/setup/baseconda.py +++ b/ersilia/setup/baseconda.py @@ -24,6 +24,7 @@ class SetupBaseConda(object): delete(org, tag) Deletes the specified Conda environment. """ + def __init__(self, config_json=None): self.conda = SimpleConda() self.versions = Versioner() @@ -160,7 +161,9 @@ def find_closest_python_version(self, python_version: str) -> str: bash_script = """ source {0}/etc/profile.d/conda.sh conda search python > {1} - """.format(self.conda.conda_prefix(is_base), tmp_file) + """.format( + self.conda.conda_prefix(is_base), tmp_file + ) with open(tmp_script, "w") as f: f.write(bash_script) run_command("bash {0}".format(tmp_script)) @@ -208,12 +211,16 @@ def setup(self, org: str, tag: str) -> None: bash_script = """ source {0}/etc/profile.d/conda.sh conda deactivate - """.format(self.conda.conda_prefix(False)) + """.format( + self.conda.conda_prefix(False) + ) else: bash_script = "" bash_script += """ source {0}/etc/profile.d/conda.sh - """.format(self.conda.conda_prefix(True)) + """.format( + self.conda.conda_prefix(True) + ) python_version = self.find_closest_python_version(ptag["python"]) bash_script += """ cd {0} @@ -221,7 +228,9 @@ def setup(self, org: str, tag: str) -> None: conda activate {1} {3} conda deactivate - """.format(tmp_repo, env, python_version, cmd) + """.format( + tmp_repo, env, python_version, cmd + ) with open(tmp_script, "w") as f: f.write(bash_script) run_command("bash {0}".format(tmp_script)) diff --git a/ersilia/setup/basedocker.py b/ersilia/setup/basedocker.py index edff2d426..0a615c09b 100644 --- a/ersilia/setup/basedocker.py +++ b/ersilia/setup/basedocker.py @@ -17,6 +17,7 @@ class SetupBaseDocker(ErsiliaBase): config_json : dict, optional Configuration settings in JSON format. """ + def __init__(self, config_json=None): ErsiliaBase.__init__(self, config_json=config_json) self.docker = SimpleDocker() @@ -77,7 +78,9 @@ def setup(self, org: str, tag: str) -> None: COPY . . RUN pip install . - """.format(tag, self.cfg.ENV.DOCKER.IMAGE_WORKDIR) + """.format( + tag, self.cfg.ENV.DOCKER.IMAGE_WORKDIR + ) path = os.path.join(tmp_repo, "Dockerfile") with open(path, "w") as f: lines = dockerfile.split("\n") diff --git a/ersilia/setup/requirements/bentoml.py b/ersilia/setup/requirements/bentoml.py index 99421cd30..d46e55b3a 100644 --- a/ersilia/setup/requirements/bentoml.py +++ b/ersilia/setup/requirements/bentoml.py @@ -18,6 +18,7 @@ class BentoMLRequirement(object): install() Installs the Ersilia version of BentoML. """ + def __init__(self): pass @@ -32,6 +33,7 @@ def is_installed(self) -> bool: """ try: import bentoml + return True except ImportError: return False diff --git a/ersilia/setup/requirements/compound.py b/ersilia/setup/requirements/compound.py index f161c10c4..763c7fcbf 100644 --- a/ersilia/setup/requirements/compound.py +++ b/ersilia/setup/requirements/compound.py @@ -11,6 +11,7 @@ class RdkitRequirement(object): install() Installs the RDKit library. """ + def __init__(self): self.name = "rdkit" try: @@ -40,6 +41,7 @@ class ChemblWebResourceClientRequirement(object): install() Installs the ChEMBL web resource client library. """ + def __init__(self): self.name = "chembl_webresource_client" try: diff --git a/ersilia/setup/requirements/conda.py b/ersilia/setup/requirements/conda.py index 3ffdf4b02..0987a8d43 100644 --- a/ersilia/setup/requirements/conda.py +++ b/ersilia/setup/requirements/conda.py @@ -12,6 +12,7 @@ class CondaRequirement(object): install() Placeholder for installing Conda. """ + def __init__(self): self.name = "conda" diff --git a/ersilia/setup/requirements/docker.py b/ersilia/setup/requirements/docker.py index f099bf2fb..b2adbac69 100644 --- a/ersilia/setup/requirements/docker.py +++ b/ersilia/setup/requirements/docker.py @@ -20,6 +20,7 @@ class DockerRequirement(object): is_installed() Checks if Docker is installed. """ + def __init__(self): self.name = "docker" diff --git a/ersilia/setup/requirements/eospath.py b/ersilia/setup/requirements/eospath.py index 5dec844f9..017b95e82 100644 --- a/ersilia/setup/requirements/eospath.py +++ b/ersilia/setup/requirements/eospath.py @@ -13,6 +13,7 @@ class EosHomePathRequirement(object): eos_home_path_exists() Checks if the EOS home path exists. """ + def __init__(self): pass diff --git a/ersilia/setup/requirements/git.py b/ersilia/setup/requirements/git.py index 2cf9e1166..aef467bfa 100644 --- a/ersilia/setup/requirements/git.py +++ b/ersilia/setup/requirements/git.py @@ -17,6 +17,7 @@ class GithubCliRequirement(object): install() Installs the GitHub CLI. """ + def __init__(self): self.name = "gh" @@ -75,6 +76,7 @@ class GitLfsRequirement(object): install() Installs Git LFS. """ + def __init__(self): self.name = "git-lfs" diff --git a/ersilia/setup/requirements/isaura.py b/ersilia/setup/requirements/isaura.py index 8066189c6..954974239 100644 --- a/ersilia/setup/requirements/isaura.py +++ b/ersilia/setup/requirements/isaura.py @@ -11,6 +11,7 @@ class IsauraRequirement(object): install() Installs the Isaura library. """ + def __init__(self): self.name = "isaura" try: diff --git a/ersilia/setup/requirements/ping.py b/ersilia/setup/requirements/ping.py index 8aba7535c..155e5920e 100644 --- a/ersilia/setup/requirements/ping.py +++ b/ersilia/setup/requirements/ping.py @@ -12,6 +12,7 @@ class PingRequirement(object): is_connected() Checks if the system is connected to the internet. """ + def __init__(self): pass diff --git a/ersilia/setup/utils/clone.py b/ersilia/setup/utils/clone.py index b72189253..4720fa788 100644 --- a/ersilia/setup/utils/clone.py +++ b/ersilia/setup/utils/clone.py @@ -16,6 +16,7 @@ class ErsiliaCloner(ErsiliaBase): clone(path, version) Clones the Ersilia repository to the specified path and version. """ + def __init__(self, config_json=None): ErsiliaBase.__init__(self, config_json=config_json) checker = Checker() diff --git a/ersilia/tools/bentoml/configuration/configparser.py b/ersilia/tools/bentoml/configuration/configparser.py index 2199fd7c7..3d85cfc17 100644 --- a/ersilia/tools/bentoml/configuration/configparser.py +++ b/ersilia/tools/bentoml/configuration/configparser.py @@ -42,7 +42,9 @@ def __init__(self, default_config: str, *args, **kwargs): def _env_var_name(section, key): return "BENTOML__{}__{}".format(section.upper(), key.upper()) - def get(self, section: str, key: str = None, **kwargs) -> str: # pylint:disable=arguments-differ + def get( + self, section: str, key: str = None, **kwargs + ) -> str: # pylint:disable=arguments-differ """ A simple hierarchical config access, priority order: 1. environment var diff --git a/ersilia/utils/conda.py b/ersilia/utils/conda.py index 3f59a10a7..0860f5253 100644 --- a/ersilia/utils/conda.py +++ b/ersilia/utils/conda.py @@ -31,6 +31,7 @@ class BaseConda(object): conda_prefix(is_base) Get the conda prefix path. """ + def __init__(self): self.SPECS_JSON = SPECS_JSON self.CHECKSUM_FILE = CHECKSUM_FILE @@ -109,6 +110,7 @@ class CondaUtils(BaseConda): conda_utils.create("myenv", "3.8") """ + def __init__(self, config_json=None): BaseConda.__init__(self) self.versions = Versioner(config_json=config_json) @@ -390,7 +392,9 @@ def activate_base(self): snippet = """ source {0}/etc/profile.d/conda.sh conda activate {1} - """.format(self.conda_prefix(False), BASE) + """.format( + self.conda_prefix(False), BASE + ) return snippet @@ -411,6 +415,7 @@ class SimpleConda(CondaUtils): simple_conda.create("myenv", "3.8") """ + def __init__(self, config_json=None): CondaUtils.__init__(self, config_json=config_json) @@ -421,7 +426,9 @@ def _env_list(self): bash_script = """ source {0}/etc/profile.d/conda.sh conda env list > {1} - """.format(self.conda_prefix(self.is_base()), tmp_file) + """.format( + self.conda_prefix(self.is_base()), tmp_file + ) with open(tmp_script, "w") as f: f.write(bash_script) run_command("bash {0}".format(tmp_script)) @@ -544,7 +551,9 @@ def delete_one(self, environment): bash_script += """ source {0}/etc/profile.d/conda.sh conda env remove --name {1} -y - """.format(self.conda_prefix(True), environment) + """.format( + self.conda_prefix(True), environment + ) with open(tmp_script, "w") as f: f.write(bash_script) run_command("bash {0}".format(tmp_script)) @@ -595,7 +604,9 @@ def export_env_yml(self, environment, dest): conda activate {1} conda env export --no-builds > {2} conda deactivate - """.format(self.conda_prefix(True), environment, yml_file) + """.format( + self.conda_prefix(True), environment, yml_file + ) with open(tmp_script, "w") as f: f.write(bash_script) run_command("bash {0}".format(tmp_script)) @@ -626,7 +637,9 @@ def clone(self, src_env, dst_env): bash_script += """ source {0}/etc/profile.d/conda.sh conda create --clone {1} --name {2} -y - """.format(self.conda_prefix(True), src_env, dst_env) + """.format( + self.conda_prefix(True), src_env, dst_env + ) with open(tmp_script, "w") as f: f.write(bash_script) run_command("bash {0}".format(tmp_script)) @@ -665,7 +678,9 @@ def create_executable_bash_script(self, environment, commandlines, file_name): source {0}/etc/profile.d/conda.sh conda activate {1} {2} - """.format(self.conda_prefix(True), environment, commandlines) + """.format( + self.conda_prefix(True), environment, commandlines + ) with open(file_name, "w") as f: f.write(bash_script) return file_name @@ -718,6 +733,7 @@ class StandaloneConda(object): """ A class to manage standalone conda environments. """ + def __init__(self): pass @@ -761,7 +777,9 @@ def run_commandlines(self, environment, commandlines): bash_script = """ source /{0}/bin/activate {1} - """.format(environment, commandlines) + """.format( + environment, commandlines + ) with open(tmp_script, "w") as f: f.write(bash_script) diff --git a/ersilia/utils/config.py b/ersilia/utils/config.py index b362352b7..f9cf1c577 100644 --- a/ersilia/utils/config.py +++ b/ersilia/utils/config.py @@ -25,6 +25,7 @@ class Checker(object): get_development_path() Get the development path. """ + def __init__(self): self.development_path = None self._config() @@ -166,6 +167,7 @@ class Config(object): json_file : str, optional The path to the JSON configuration file. Default is None. """ + def __init__(self, json_file=None): """Initialize a Config instance. @@ -202,6 +204,7 @@ class Secrets(object): overwrite : bool, optional Whether to overwrite existing files. Default is True. """ + def __init__(self, overwrite=True): self.overwrite = overwrite self.secrets_json = os.path.join(EOS, SECRETS_JSON) @@ -273,6 +276,7 @@ class Credentials(object): json_file : str, optional The path to the JSON credentials file. Default is None. """ + def __init__(self, json_file=None): if json_file is None: try: diff --git a/ersilia/utils/csvfile.py b/ersilia/utils/csvfile.py index 8524621bd..eb092e458 100644 --- a/ersilia/utils/csvfile.py +++ b/ersilia/utils/csvfile.py @@ -14,6 +14,7 @@ class CsvDataLoader(object): read(file_path) Read data from a CSV, TSV, or JSON file. """ + def __init__(self): self.values = None self.keys = None diff --git a/ersilia/utils/docker.py b/ersilia/utils/docker.py index 524ebba89..dfd9ce8cf 100644 --- a/ersilia/utils/docker.py +++ b/ersilia/utils/docker.py @@ -72,6 +72,7 @@ class SimpleDocker(object): use_udocker : bool, optional Whether to use udocker instead of Docker. Default is None. """ + def __init__(self, use_udocker=None): self.identifier = LongIdentifier() self.logger = logger @@ -545,6 +546,7 @@ class SimpleDockerfileParser(DockerfileParser): path : str The path to the Dockerfile or the directory containing the Dockerfile. """ + def __init__(self, path): if os.path.isdir(path): path = os.path.join(path, "Dockerfile") @@ -591,6 +593,7 @@ class ContainerMetricsSampler: sampling_interval : float, optional The interval between samples in seconds. Default is 0.01. """ + def __init__(self, model_id, sampling_interval=0.01): self.client = docker.from_env() self.logger = logger diff --git a/ersilia/utils/download.py b/ersilia/utils/download.py index 339039603..53393f25a 100644 --- a/ersilia/utils/download.py +++ b/ersilia/utils/download.py @@ -26,6 +26,7 @@ class PseudoDownloader(object): overwrite : bool Whether to overwrite existing files. """ + def __init__(self, overwrite): self.overwrite = overwrite @@ -65,6 +66,7 @@ class OsfDownloader(object): overwrite : bool Whether to overwrite existing files. """ + def __init__(self, overwrite): self.overwrite = overwrite @@ -101,6 +103,7 @@ class GoogleDriveDownloader(object): """ A class to download files from Google Drive. """ + def __init__(self): pass @@ -168,6 +171,7 @@ class GitHubDownloader(object): token : str, optional The GitHub token for authentication. Default is None. """ + def __init__(self, overwrite, token=None): self.logger = logger @@ -204,7 +208,9 @@ def _clone_with_git(self, org, repo, destination): GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/{1}/{2}.git mv {2} {3} rm {0} - """.format(tmp_folder, org, repo, destination) + """.format( + tmp_folder, org, repo, destination + ) run_file = os.path.join( os.path.abspath(make_temp_dir(prefix="ersilia")), "run.sh" ) @@ -429,6 +435,7 @@ class S3Downloader(object): """ A class to download files from an S3 bucket. """ + def __init__(self): pass diff --git a/ersilia/utils/dvc.py b/ersilia/utils/dvc.py index 81ec069a6..bfe202701 100644 --- a/ersilia/utils/dvc.py +++ b/ersilia/utils/dvc.py @@ -32,6 +32,7 @@ class DVCFetcher(object): local_repo_path : str The local repository path. """ + def __init__(self, local_repo_path): self.repo_path = local_repo_path @@ -91,6 +92,7 @@ class DVCBrancher(object): """ A class to manage DVC branches. """ + def __init__(self): pass @@ -106,6 +108,7 @@ class DVCSetup(object): model_id : str The model identifier. """ + def __init__(self, local_repo_path, model_id): self.repo_path = local_repo_path self.model_id = model_id diff --git a/ersilia/utils/exceptions_utils/base_information_exceptions.py b/ersilia/utils/exceptions_utils/base_information_exceptions.py new file mode 100644 index 000000000..3c0fe754f --- /dev/null +++ b/ersilia/utils/exceptions_utils/base_information_exceptions.py @@ -0,0 +1,281 @@ +import os + +from .exceptions import ErsiliaError +from ...default import AIRTABLE_MODEL_HUB_VIEW_URL + + +def _read_default_fields(field): + root = os.path.dirname(os.path.abspath(__file__)) + filename = field.lower().replace(" ", "_") + file_path = os.path.join( + root, "..", "..", "hub", "content", "metadata", filename + ".txt" + ) + with open(file_path, "r") as f: + valid_field = f.read().split("\n") + return valid_field + + +class BaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia model information\n" + self.hints = "Please check Ersilia AirTable to make sure you are providing the right information. This is the AirTable link: {0}".format( + AIRTABLE_MODEL_HUB_VIEW_URL + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class IdentifierBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia model identifier" + self.hints = "Ersilia model identifiers are 7 alphanumeric characters. They always start with eos, followed by a digit. The eos identifier coincides with the name of the repository. Check our current AirTable to see correct identifiers: {0}".format( + AIRTABLE_MODEL_HUB_VIEW_URL + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class SlugBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia slug" + self.hints = "Slug must be a 5-60 chars lowercase single-word unique identifier. Use '-' for linking words if necessary" + ErsiliaError.__init__(self, self.message, self.hints) + + +class StatusBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia status" + self.hints = "Only one of the following status is allowed: {}".format( + ", ".join(_read_default_fields("Status")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class TitleBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia title" + self.hints = "Title must be a 1 sentence (10 to 300 chars)" + ErsiliaError.__init__(self, self.message, self.hints) + + +class DescriptionBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia description" + self.hints = "Description must be longer than 200 characters and different from the title" + ErsiliaError.__init__(self, self.message, self.hints) + + +class ModeBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia mode" + self.hints = "Only one of the following modes is allowed: {}".format( + ", ".join(_read_default_fields("Mode")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class SourceBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong source information" + self.hints = "Only one of the following sources is allowed: {}".format( + ", ".join(_read_default_fields("Source")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class SourceTypeBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong source type information" + self.hints = "Only one of the following source types is allowed: {}".format( + ", ".join(_read_default_fields("Source Types")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class TaskBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia model task" + self.hints = ( + "Only these tasks are allowed: {}. Tasks must be in list format".format( + ", ".join(_read_default_fields("Task")) + ) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class SubtaskBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia model subtask" + self.hints = "Only these subtasks are allowed: {}. Subtasks must be in list format".format( + ", ".join(_read_default_fields("Subtask")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class BiomedicalAreaBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong biomedical area" + self.hints = "Only these biomedical areas are allowed: {}. Biomedical areas must be in list format".format( + ", ".join(_read_default_fields("Biomedical Area")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class TargetOrganismBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong target organism" + self.hints = "Only these target organisms are allowed: {}. Target organisms must be in list format".format( + ", ".join(_read_default_fields("Target Organism")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class InputBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia input" + self.hints = "Only inputs allowed: {}. Input must be in list format".format( + ", ".join(_read_default_fields("Input")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class InputShapeBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia input shape" + self.hints = "Only one of the following shapes is allowed: {}".format( + ", ".join(_read_default_fields("Input Shape")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class OutputBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia output" + self.hints = "Only one of the following outputs is allowed: {}".format( + ", ".join(_read_default_fields("Output")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class OutputTypeBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia output type" + self.hints = "Only output types allowed: {}. More than one output type can be added in list format".format( + ", ".join(_read_default_fields("Output Type")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class OutputShapeBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia output shape" + self.hints = "Only one of the following output shapes is allowed: {}".format( + ", ".join(_read_default_fields("Output Shape")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class OutputDimensionBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong dimension" + self.hints = "Dimension should be at least 1" + ErsiliaError.__init__(self, self.message, self.hints) + + +class OutputConsistencyBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong consistency" + self.hints = ( + "Only one of the following output consistency is allowed: {}".format( + ", ".join(_read_default_fields("Output Consistency")) + ) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class TagBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia model tag" + self.hints = "Tags must be in list format and they must be accepted our team. This means that only tags that are already available in Ersilia are allowed. If you want to include a new tag, please open a pull request (PR) on the 'tag.txt' file from the Ersilia repository." + ErsiliaError.__init__(self, self.message, self.hints) + + +class LicenseBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong license" + self.hints = "Listed licenses are: {}. If the model has a license not in this list, please open a PR on the 'license.txt' file in the Ersilia repository".format( + ", ".join(_read_default_fields("License")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class GithubBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia GitHub URL" + self.hints = "The model does not seem to be publicly available in Ersilia's GitHub organization profile (ersilia-os). Make sure that a model identifier has been set." + ErsiliaError.__init__(self, self.message, self.hints) + + +class DockerhubBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia DockerHub URL" + self.hints = "The model does not seem to be publicly available in Ersilia's DockerHub organization profile (ersiliaos). Make sure that a model identifier has been set." + ErsiliaError.__init__(self, self.message, self.hints) + + +class DockerArchitectureBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Docker architecture" + self.hints = "Listed Docker architectures are: {}. If you are considering a Docker architecture that is not in this list, please open a PR on the 'docker_architecture.txt' file in the Ersilia repository".format( + ", ".join(_read_default_fields("Docker Architecture")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class S3BaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong Ersilia AWS S3 URL" + self.hints = "The model does not seem to be publicly available in Ersilia's AWS S3 bucket for zipped models. Make sure that a model identifier has been set." + ErsiliaError.__init__(self, self.message, self.hints) + + +class BothIdentifiersBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Both identifiers field error" + self.hints = "Ersilia model identifier and/or slug have not been set yet" + ErsiliaError.__init__(self, self.message, self.hints) + + +class PublicationBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Publication field error" + self.hints = "Publication must be a valid URL" + ErsiliaError.__init__(self, self.message, self.hints) + + +class PublicationTypeBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong publication type" + self.hints = "Only one of the following status is allowed: {}".format( + ", ".join(_read_default_fields("Status")) + ) + ErsiliaError.__init__(self, self.message, self.hints) + + +class PublicationYearBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Wrong publication year" + self.hints = "Publication year must be valid" + ErsiliaError.__init__(self, self.message, self.hints) + + +class SourceCodeBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Source Code field error" + self.hints = "Source Code must be a valid URL" + ErsiliaError.__init__(self, self.message, self.hints) + + +class MemoryGbBaseInformationError(ErsiliaError): + def __init__(self): + self.message = "Memory Gb field error" + self.hints = "Memory Gb field must be specified as an integer indicating GB of memory limit" + ErsiliaError.__init__(self, self.message, self.hints) diff --git a/ersilia/utils/exceptions_utils/card_exceptions.py b/ersilia/utils/exceptions_utils/card_exceptions.py index e9a990526..2f5343383 100644 --- a/ersilia/utils/exceptions_utils/card_exceptions.py +++ b/ersilia/utils/exceptions_utils/card_exceptions.py @@ -1,18 +1,5 @@ -from .exceptions import ErsiliaError -from ...default import AIRTABLE_MODEL_HUB_VIEW_URL - import os - - -def _read_default_fields(field): - root = os.path.dirname(os.path.abspath(__file__)) - filename = field.lower().replace(" ", "_") - file_path = os.path.join( - root, "..", "..", "hub", "content", "metadata", filename + ".txt" - ) - with open(file_path, "r") as f: - valid_field = f.read().split("\n") - return valid_field +from .exceptions import ErsiliaError class CardErsiliaError(ErsiliaError): @@ -20,189 +7,3 @@ def __init__(self): self.message = "Error occured while running card command" self.hints = "" ErsiliaError.__init__(self, self.message, self.hints) - - -# TODO Unused - remove -class BaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia model information\n" - self.hints = "Please check Ersilia AirTable to make sure you are providing the right information. This is the AirTable link: {0}".format( - AIRTABLE_MODEL_HUB_VIEW_URL - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class IdentifierBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia model identifier" - self.hints = "Ersilia model identifiers are 7 alphanumeric characters. They always start with eos, followed by a digit. The eos identifier coincides with the name of the repository. Check our current AirTable to see correct identifiers: {0}".format( - AIRTABLE_MODEL_HUB_VIEW_URL - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class SlugBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia slug" - self.hints = "Slug must be a 5-60 chars lowercase single-word unique identifier. Use '-' for linking words if necessary" - ErsiliaError.__init__(self, self.message, self.hints) - - -class StatusBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia status" - self.hints = "Only one of the following status is allowed: {}".format( - ", ".join(_read_default_fields("Status")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class TitleBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia title" - self.hints = "Title must be a 1 sentence (10 to 300 chars)" - ErsiliaError.__init__(self, self.message, self.hints) - - -class DescriptionBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia description" - self.hints = "Description must be longer than 200 characters and different from the title" - ErsiliaError.__init__(self, self.message, self.hints) - - -class ModeBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia mode" - self.hints = "Only one of the following modes is allowed: {}".format( - ", ".join(_read_default_fields("Mode")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class TaskBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia model task" - self.hints = "Only tasks allowed: {}. Tasks must be in list format".format( - ", ".join(_read_default_fields("Task")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class InputBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia input" - self.hints = "Only inputs allowed: {}. Input must be in list format".format( - ", ".join(_read_default_fields("Input")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class InputShapeBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia input shape" - self.hints = "Only one of the following shapes is allowed: {}".format( - ", ".join(_read_default_fields("Input Shape")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class OutputBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia output" - self.hints = "Only one of the following outputs is allowed: {}".format( - ", ".join(_read_default_fields("Output")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class OutputTypeBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia output type" - self.hints = "Only output types allowed: {}. More than one output type can be added in list format".format( - ", ".join(_read_default_fields("Output Type")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class OutputShapeBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia output shape" - self.hints = "Only one of the following output shapes is allowed: {}".format( - ", ".join(_read_default_fields("Output Shape")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class TagBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia model tag" - self.hints = "Tags must be in list format and they must be accepted our team. This means that only tags that are already available in Ersilia are allowed. If you want to include a new tag, please open a pull request (PR) on the 'tag.txt' file from the Ersilia repository." - ErsiliaError.__init__(self, self.message, self.hints) - - -class LicenseBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong license" - self.hints = "Listed licenses are: {}. If the model has a license not in this list, please open a PR on the 'license.txt' file in the Ersilia repository".format( - ", ".join(_read_default_fields("License")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class GithubBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia GitHub URL" - self.hints = "The model does not seem to be publicly available in Ersilia's GitHub organization profile (ersilia-os). Make sure that a model identifier has been set." - ErsiliaError.__init__(self, self.message, self.hints) - - -class DockerhubBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia DockerHub URL" - self.hints = "The model does not seem to be publicly available in Ersilia's DockerHub organization profile (ersiliaos). Make sure that a model identifier has been set." - ErsiliaError.__init__(self, self.message, self.hints) - - -class DockerArchitectureInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Docker architecture" - self.hints = "Listed Docker architectures are: {}. If you are considering a Docker architecture that is not in this list, please open a PR on the 'docker_architecture.txt' file in the Ersilia repository".format( - ", ".join(_read_default_fields("Docker Architecture")) - ) - ErsiliaError.__init__(self, self.message, self.hints) - - -class S3BaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Wrong Ersilia AWS S3 URL" - self.hints = "The model does not seem to be publicly available in Ersilia's AWS S3 bucket for zipped models. Make sure that a model identifier has been set." - ErsiliaError.__init__(self, self.message, self.hints) - - -class BothIdentifiersBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Both identifiers field error" - self.hints = "Ersilia model identifier and/or slug have not been set yet" - ErsiliaError.__init__(self, self.message, self.hints) - - -class PublicationBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Publication field error" - self.hints = "Publication must be a valid URL" - ErsiliaError.__init__(self, self.message, self.hints) - - -class SourceCodeBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Source Code field error" - self.hints = "Source Code must be a valid URL" - ErsiliaError.__init__(self, self.message, self.hints) - - -class MemoryGbBaseInformationError(ErsiliaError): - def __init__(self): - self.message = "Memory Gb field error" - self.hints = "Memory Gb field must be specified as an integer indicating GB of memory limit" - ErsiliaError.__init__(self, self.message, self.hints) diff --git a/ersilia/utils/exceptions_utils/exceptions.py b/ersilia/utils/exceptions_utils/exceptions.py index 60fe642b0..4d34a4690 100644 --- a/ersilia/utils/exceptions_utils/exceptions.py +++ b/ersilia/utils/exceptions_utils/exceptions.py @@ -28,6 +28,7 @@ class ErsiliaError(Exception): print(e) """ + def __init__( self, message="Ersilia has experienced an error", hints="", config_json=None ): diff --git a/ersilia/utils/hdf5.py b/ersilia/utils/hdf5.py index 8eb141ee7..70d0d5db0 100644 --- a/ersilia/utils/hdf5.py +++ b/ersilia/utils/hdf5.py @@ -17,6 +17,7 @@ class Hdf5Data(object): features : array-like The features associated with the data. """ + def __init__(self, values, keys, inputs, features): self.values = np.array(values, dtype=np.float32) self.keys = np.array(keys, dtype=h5py.string_dtype()) @@ -48,6 +49,7 @@ class Hdf5DataLoader(object): load(h5_file) Load data from an HDF5 file. """ + def __init__(self): self.values = None self.keys = None @@ -79,6 +81,7 @@ class Hdf5DataStacker(object): h5_files : list A list of paths to the HDF5 files to stack. """ + def __init__(self, h5_files): self.h5_files = h5_files diff --git a/ersilia/utils/identifiers/compound.py b/ersilia/utils/identifiers/compound.py index cd5ae12d2..1d59a24fd 100644 --- a/ersilia/utils/identifiers/compound.py +++ b/ersilia/utils/identifiers/compound.py @@ -46,6 +46,7 @@ class CompoundIdentifier(object): print(inchikey) """ + def __init__(self, local=True, concurrency_limit=10, cache_maxsize=128): if local: self.Chem = Chem diff --git a/ersilia/utils/identifiers/file.py b/ersilia/utils/identifiers/file.py index 15ab8f136..0fa6e840e 100644 --- a/ersilia/utils/identifiers/file.py +++ b/ersilia/utils/identifiers/file.py @@ -13,6 +13,7 @@ class FileIdentifier(object): chunk_size : int, optional The size of the chunks to read from the file. Default is 10000 bytes. """ + def __init__(self, chunk_size=10000): self.chunk_size = chunk_size diff --git a/ersilia/utils/identifiers/long.py b/ersilia/utils/identifiers/long.py index f1b47d6a8..8da87a4e2 100644 --- a/ersilia/utils/identifiers/long.py +++ b/ersilia/utils/identifiers/long.py @@ -18,6 +18,7 @@ class LongIdentifier(object): encode() Generate a UUID or a random identifier if UUID is not available. """ + def __init__(self): pass diff --git a/ersilia/utils/identifiers/model.py b/ersilia/utils/identifiers/model.py index 52fd1218a..325f7e370 100644 --- a/ersilia/utils/identifiers/model.py +++ b/ersilia/utils/identifiers/model.py @@ -10,6 +10,7 @@ class ModelIdentifier(object): A class to handle model identification generation for new ersilia model and validation. """ + def __init__(self): self.letters = string.ascii_lowercase self.numbers = "0123456789" diff --git a/ersilia/utils/identifiers/protein.py b/ersilia/utils/identifiers/protein.py index 0556579e3..84422d27b 100644 --- a/ersilia/utils/identifiers/protein.py +++ b/ersilia/utils/identifiers/protein.py @@ -7,6 +7,7 @@ class ProteinIdentifier(object): """ A class to handle protein identification and sequence retrieval. """ + def __init__(self): self.seguid = seguid self.uniprot = UniProt(verbose=False) diff --git a/ersilia/utils/identifiers/short.py b/ersilia/utils/identifiers/short.py index f784ff064..70c6d1836 100644 --- a/ersilia/utils/identifiers/short.py +++ b/ersilia/utils/identifiers/short.py @@ -18,6 +18,7 @@ class ShortIdentifier(object): """ A class to generate short identifiers. """ + def __init__(self): if Hashids is None: self.hashids = None diff --git a/ersilia/utils/identifiers/text.py b/ersilia/utils/identifiers/text.py index 8d930e302..afd8ffd77 100644 --- a/ersilia/utils/identifiers/text.py +++ b/ersilia/utils/identifiers/text.py @@ -5,9 +5,10 @@ class TextIdentifier(object): """ A class to handle text identification by generating MD5 checksums. - This class provides methods to generate a unique identifier (checksum) for a given text string using the MD5 hashing algorithm. + This class provides methods to generate a unique identifier (checksum) for a given text string using the MD5 hashing algorithm. It also includes a method to perform a basic validation check on the generated checksum. """ + def __init__(self): pass diff --git a/ersilia/utils/installers.py b/ersilia/utils/installers.py index 63ed7add7..a97449f52 100644 --- a/ersilia/utils/installers.py +++ b/ersilia/utils/installers.py @@ -29,6 +29,7 @@ class BaseInstaller(ErsiliaBase): credentials_json : dict, optional Credentials settings in JSON format. Default is None. """ + def __init__(self, check_install_log, config_json, credentials_json): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -132,6 +133,7 @@ class Installer(BaseInstaller): credentials_json : dict, optional Credentials settings in JSON format. Default is None. """ + def __init__(self, check_install_log=True, config_json=None, credentials_json=None): BaseInstaller.__init__( self, @@ -233,12 +235,16 @@ def base_conda(self): bash_script = """ source {0}/etc/profile.d/conda.sh conda deactivate - """.format(sc.conda_prefix(False)) + """.format( + sc.conda_prefix(False) + ) else: bash_script = "" bash_script += """ source {0}/etc/profile.d/conda.sh - """.format(sc.conda_prefix(True)) + """.format( + sc.conda_prefix(True) + ) bc = SetupBaseConda() python_version = self.versions.python_version() python_version = bc.find_closest_python_version(python_version) @@ -249,7 +255,9 @@ def base_conda(self): pip install -e . python {3} conda deactivate - """.format(tmp_repo, eos_base_env, python_version, tmp_python_script) + """.format( + tmp_repo, eos_base_env, python_version, tmp_python_script + ) with open(tmp_script, "w") as f: f.write(bash_script) python_script = """ @@ -343,6 +351,7 @@ class Uninstaller(BaseInstaller): credentials_json : dict, optional Credentials settings in JSON format. Default is None. """ + def __init__(self, check_install_log=True, config_json=None, credentials_json=None): BaseInstaller.__init__( self, diff --git a/ersilia/utils/logging.py b/ersilia/utils/logging.py index 5753ae517..bedbccfc0 100644 --- a/ersilia/utils/logging.py +++ b/ersilia/utils/logging.py @@ -57,6 +57,7 @@ class Logger(object): success(text) Log a success message. """ + _instance = None def __new__(cls, *args, **kwargs): diff --git a/ersilia/utils/paths.py b/ersilia/utils/paths.py index 5b343db16..d24a88888 100644 --- a/ersilia/utils/paths.py +++ b/ersilia/utils/paths.py @@ -32,6 +32,7 @@ class Paths(object): exists(path) Check if a path exists. """ + def __init__(self): self.essentials = ["setup.py", "README.md", "CODE_OF_CONDUCT.md"] @@ -178,6 +179,7 @@ class Metadata: Contributor : Optional[str], optional The contributor of the model. Default is None. """ + Identifier: str Slug: str Title: str diff --git a/ersilia/utils/remove.py b/ersilia/utils/remove.py index b9f7c8568..1ed41b175 100644 --- a/ersilia/utils/remove.py +++ b/ersilia/utils/remove.py @@ -13,6 +13,7 @@ class OsfRemover(object): password : str The OSF password. """ + def __init__(self, username, password): self.username = username self.password = password diff --git a/ersilia/utils/session.py b/ersilia/utils/session.py index 09b72cba1..421c8bf63 100644 --- a/ersilia/utils/session.py +++ b/ersilia/utils/session.py @@ -3,7 +3,14 @@ import psutil import json -from ..default import SESSIONS_DIR, LOGS_DIR, CONTAINER_LOGS_TMP_DIR, SESSION_JSON, EOS, MODELS_JSON +from ..default import ( + SESSIONS_DIR, + LOGS_DIR, + CONTAINER_LOGS_TMP_DIR, + SESSION_JSON, + EOS, + MODELS_JSON, +) def get_current_pid(): @@ -155,11 +162,11 @@ def register_model_session(model_id, session_dir): The session directory. """ file_path = os.path.join(EOS, MODELS_JSON) - + if not os.path.exists(file_path): with open(file_path, "w") as f: json.dump({}, f, indent=4) - + with open(file_path, "r") as f: models = json.load(f) @@ -210,4 +217,4 @@ def deregister_model_session(model_id): if model_id in models: del models[model_id] with open(file_path, "w") as f: - json.dump(models, f, indent=4) \ No newline at end of file + json.dump(models, f, indent=4) diff --git a/ersilia/utils/system.py b/ersilia/utils/system.py index 9b371b760..cb580e585 100644 --- a/ersilia/utils/system.py +++ b/ersilia/utils/system.py @@ -30,6 +30,7 @@ class SystemChecker(object): is_inside_docker() Check if the code is running inside a Docker container. """ + def __init__(self): self.uname = platform.uname() diff --git a/ersilia/utils/uninstall.py b/ersilia/utils/uninstall.py index 81b98736e..52ea7375c 100644 --- a/ersilia/utils/uninstall.py +++ b/ersilia/utils/uninstall.py @@ -17,6 +17,7 @@ class Uninstaller(object): uninstall() Main uninstallation method. """ + def __init__(self): self.docker_cleaner = SimpleDocker() diff --git a/ersilia/utils/upload.py b/ersilia/utils/upload.py index ea101f89c..5c14d8bea 100644 --- a/ersilia/utils/upload.py +++ b/ersilia/utils/upload.py @@ -20,6 +20,7 @@ class OsfUploader(object): push(project_id, filename, destination) Upload a file to OSF. """ + def __init__(self, overwrite, username, password): self.overwrite = overwrite self.username = username diff --git a/ersilia/utils/venv.py b/ersilia/utils/venv.py index 01facd861..5fe8874a5 100644 --- a/ersilia/utils/venv.py +++ b/ersilia/utils/venv.py @@ -32,6 +32,7 @@ class SimpleVenv(ErsiliaBase): run_commandlines(environment, commandlines) Run command lines in a virtual environment. """ + def __init__(self, root): ErsiliaBase.__init__(self, config_json=None, credentials_json=None) self.root = os.path.abspath(root) diff --git a/ersilia/utils/versioning.py b/ersilia/utils/versioning.py index 401ce7e2e..f044b5d8a 100644 --- a/ersilia/utils/versioning.py +++ b/ersilia/utils/versioning.py @@ -32,6 +32,7 @@ class Versioner(ErsiliaBase): reformat_py(v) Reformat a Python version string. """ + def __init__(self, config_json=None): ErsiliaBase.__init__(self, config_json=config_json) diff --git a/ersilia/utils/zip.py b/ersilia/utils/zip.py index dc82b89a5..71082f791 100644 --- a/ersilia/utils/zip.py +++ b/ersilia/utils/zip.py @@ -19,6 +19,7 @@ class Zipper(object): zip(dir_name, file) Zip a directory into a file. """ + def __init__(self, remove): self.remove = remove diff --git a/pyproject.toml b/pyproject.toml index 3c114cd06..668eec33a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ersilia" -version = "0.1.39" +version = "0.1.40" description = "A hub of AI/ML models for open source drug discovery and global health" license = "GPLv3" authors = ["Ersilia Open Source Initiative "] @@ -52,7 +52,7 @@ numpy = "<=1.26.4" aiofiles = "<=24.1.0" aiohttp = ">=3.10.11" nest_asyncio = "<=1.6.0" -isaura = { version = "0.1.39", optional = true } +isaura = { version = "0.1.40", optional = true } pytest = { version = "^7.4.0", optional = true } pytest-asyncio = { version = "<=0.24.0", optional = true } pytest-benchmark = { version = "<=4.0.0", optional = true } diff --git a/test/cli/test_fetch.py b/test/cli/test_fetch.py index 78eaee7c3..aa18f0ed0 100644 --- a/test/cli/test_fetch.py +++ b/test/cli/test_fetch.py @@ -14,7 +14,10 @@ def runner(): @patch("ersilia.core.modelbase.ModelBase") -@patch("ersilia.hub.fetch.fetch.ModelFetcher.fetch", return_value=FetchResult(True, "Model fetched successfully.")) +@patch( + "ersilia.hub.fetch.fetch.ModelFetcher.fetch", + return_value=FetchResult(True, "Model fetched successfully."), +) @pytest.mark.parametrize( "slug, model, flags", [ diff --git a/test/playground/commands.py b/test/playground/commands.py index 45af72b89..5570ba6e7 100644 --- a/test/playground/commands.py +++ b/test/playground/commands.py @@ -56,7 +56,13 @@ def execute_command(command, description="", dest_path=None, repo_path=None): success, result, checkups, - ) = time.time(), 0, False, "", [] + ) = ( + time.time(), + 0, + False, + "", + [], + ) proc = psutil.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/test/test_compound_identifier.py b/test/test_compound_identifier.py index 4196c7572..a2b2de356 100644 --- a/test/test_compound_identifier.py +++ b/test/test_compound_identifier.py @@ -201,4 +201,3 @@ async def test_pubchem_smiles_to_inchikey_positive(mock_get, compound_identifier session=None, smiles="CCO" ) assert inchikey == "BSYNRYMUTXBXSQ-UHFFFAOYSA-N" - diff --git a/test/test_url_search.py b/test/test_url_search.py index f8ed17f1d..46d053edc 100644 --- a/test/test_url_search.py +++ b/test/test_url_search.py @@ -5,6 +5,7 @@ MODEL_ID = "eos9ei3" + class OriginalModelFinder: def __init__(self): pass @@ -59,7 +60,9 @@ def original_finder(): @pytest.fixture def actual_url(): data = ji.items() - URL = next((item["Host URL"] for item in data if item["Identifier"] == MODEL_ID), None) + URL = next( + (item["Host URL"] for item in data if item["Identifier"] == MODEL_ID), None + ) return URL From 16ff94ade86e9c08f69c7757b27d875190ea2505 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 12:43:07 +0530 Subject: [PATCH 09/11] Bump actions/upload-artifact from 4.4.3 to 4.5.0 (#1482) * added new categories (#1478) * added new categories * added try-except * bugfix list to dictionary * added output consistency field and blackened code * Update columns_information.py * Bump actions/upload-artifact from 4.4.3 to 4.5.0 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.3 to 4.5.0. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882...6f51ac03b9356f520e9adb1b1b7802705f340c2b) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: Miquel Duran-Frigola Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Dhanshree Arora --- .github/workflows/tests_and_cleanup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests_and_cleanup.yml b/.github/workflows/tests_and_cleanup.yml index 3b712d9ab..f33479b1c 100644 --- a/.github/workflows/tests_and_cleanup.yml +++ b/.github/workflows/tests_and_cleanup.yml @@ -228,7 +228,7 @@ jobs: #jupyter nbconvert --to notebook --execute notebooks/test-colab-notebook-python-api.ipynb --output=output-python-api.ipynb --ExecutePreprocessor.timeout=-1 - name: Upload Output of the Google Colab Notebook CLI - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # pin@v4.4.3 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # pin@v4.5.0 with: name: output-cli.ipynb path: notebooks/output-cli.ipynb From bc98caf636748080851f0bee108b9b7035fe4d82 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 12:43:52 +0530 Subject: [PATCH 10/11] Bump actions/checkout from 4.1.7 to 4.2.2 (#1483) * added new categories (#1478) * added new categories * added try-except * bugfix list to dictionary * added output consistency field and blackened code * Update columns_information.py * Bump actions/checkout from 4.1.7 to 4.2.2 Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.7 to 4.2.2. - [Release notes](https://github.com/actions/checkout/releases) - [Commits](https://github.com/actions/checkout/compare/v4.1.7...v4.2.2) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: Miquel Duran-Frigola Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Dhanshree Arora --- .github/workflows/tests_and_cleanup.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests_and_cleanup.yml b/.github/workflows/tests_and_cleanup.yml index f33479b1c..fb3c06172 100644 --- a/.github/workflows/tests_and_cleanup.yml +++ b/.github/workflows/tests_and_cleanup.yml @@ -129,7 +129,7 @@ jobs: - test_auto_fetcher_decider - test_conventional_run steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # pin@v3.5.3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v3.5.3 - name: Add conda to system path run: echo $CONDA/bin >> $GITHUB_PATH @@ -165,7 +165,7 @@ jobs: needs: run-pytest runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # pin@v3.5.3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v3.5.3 - name: Add conda to system path run: echo $CONDA/bin >> $GITHUB_PATH From c41fb346609b70f150f7edbe8f6c6492fbbf49b1 Mon Sep 17 00:00:00 2001 From: Abel Legese <73869888+Abellegese@users.noreply.github.com> Date: Fri, 3 Jan 2025 10:36:17 +0300 Subject: [PATCH 11/11] Integrating ruff precommit (#1473) * added new categories (#1478) * added new categories * added try-except * bugfix list to dictionary * added output consistency field and blackened code * Update columns_information.py * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Add Ruff as Linter and Formatter * Unnecessary files removed * Unnecessary files removed * Merge sample command with the example command (#1422) * Merge sample command with the example command * Fix example command usage * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Reconfiguring ruff to make it simple * Generalize Standard Run (#1411) * Modify header calculation to choose from predefined example output file or standard example output file * Remove the readiness function from SCRA because it is redundant, since those checks are also performed by the amenable function * Remove unused method * Make csv serialization work for any kind of model api response * Remove the standard flag from the CLI since it is now the default run * Update tests * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Unnecessary files removed * Upstream merge * Ruff importing sorting * updating readme for linting guidline * circular import fix * example command fix * few code fix * few code fix * few code fix * few code fix * few code fix * few code fix * few code fix * few code fix * few code fix * few code fix * few code fix * Code fix * Code fix * Code fix * Code fix * Code fix * Update close.py * Update test.py * Update information.py * Update test.py --------- Co-authored-by: Miquel Duran-Frigola Co-authored-by: Dhanshree Arora --- .github/scripts/airtableops.py | 7 +- .github/scripts/convert_airtable_to_json.py | 5 +- .github/scripts/generate_eos_identifier.py | 4 +- .../place_a_dockerfile_in_current_eos_repo.py | 3 +- .github/scripts/static_version_writer.py | 2 +- .github/scripts/update_metadata.py | 4 +- .../scripts/update_model_request_template.py | 2 - .../update_model_workflows_from_template.py | 2 +- .github/scripts/upload_model_to_s3.py | 1 + .github/scripts/write_secrets.py | 2 +- .github/workflows/pr_check.yml | 28 +- .pre-commit-config.yaml | 7 + README.md | 34 + conftest.py | 9 +- docs/conf.py | 27 +- ersilia/__init__.py | 1 + ersilia/_version.py | 2 +- ersilia/auth/auth.py | 16 +- ersilia/cli/__init__.py | 4 +- ersilia/cli/cmd.py | 53 ++ ersilia/cli/commands/__init__.py | 19 +- ersilia/cli/commands/catalog.py | 4 +- ersilia/cli/commands/close.py | 8 +- ersilia/cli/commands/delete.py | 9 +- ersilia/cli/commands/example.py | 11 +- ersilia/cli/commands/fetch.py | 10 +- ersilia/cli/commands/info.py | 6 +- ersilia/cli/commands/publish.py | 8 +- ersilia/cli/commands/run.py | 9 +- ersilia/cli/commands/serve.py | 8 +- ersilia/cli/commands/setup.py | 2 +- ersilia/cli/commands/test.py | 3 +- ersilia/cli/commands/uninstall.py | 2 +- ersilia/cli/echo.py | 6 +- ersilia/cli/messages.py | 5 +- ersilia/core/base.py | 5 +- ersilia/core/model.py | 60 +- ersilia/core/modelbase.py | 11 +- ersilia/core/session.py | 6 +- ersilia/core/tracking.py | 42 +- ersilia/db/disk/fetched.py | 4 +- ersilia/db/environments/localdb.py | 10 +- ersilia/db/environments/managers.py | 38 +- ersilia/db/hubdata/localslugs.py | 6 +- ersilia/db/hubdata/sanitize.py | 6 +- ersilia/db/hubdata/tables.py | 15 +- ersilia/default.py | 8 +- ersilia/hub/bundle/bundle.py | 6 +- ersilia/hub/bundle/repo.py | 30 +- ersilia/hub/bundle/status.py | 8 +- ersilia/hub/content/base_information.py | 826 ++++++++++++++++-- ersilia/hub/content/card.py | 43 +- ersilia/hub/content/catalog.py | 39 +- ersilia/hub/content/columns_information.py | 47 +- ersilia/hub/content/information.py | 16 +- ersilia/hub/content/search.py | 8 +- ersilia/hub/content/slug.py | 2 +- ersilia/hub/delete/delete.py | 43 +- ersilia/hub/fetch/__init__.py | 18 +- ersilia/hub/fetch/actions/__init__.py | 15 + ersilia/hub/fetch/actions/check.py | 5 +- ersilia/hub/fetch/actions/content.py | 15 +- ersilia/hub/fetch/actions/get.py | 17 +- ersilia/hub/fetch/actions/inform.py | 9 +- ersilia/hub/fetch/actions/modify.py | 12 +- ersilia/hub/fetch/actions/pack_bentoml.py | 8 +- ersilia/hub/fetch/actions/pack_fastapi.py | 7 +- ersilia/hub/fetch/actions/prepare.py | 7 +- ersilia/hub/fetch/actions/setup.py | 4 +- ersilia/hub/fetch/actions/sniff_bentoml.py | 17 +- ersilia/hub/fetch/actions/sniff_fastapi.py | 9 +- .../hub/fetch/actions/template_resolver.py | 7 +- ersilia/hub/fetch/actions/toolize.py | 7 +- ersilia/hub/fetch/fetch.py | 22 +- ersilia/hub/fetch/fetch_bentoml.py | 30 +- ersilia/hub/fetch/fetch_fastapi.py | 31 +- ersilia/hub/fetch/inner_template/pack.py | 8 +- .../hub/fetch/inner_template/src/service.py | 16 +- ersilia/hub/fetch/lazy_fetchers/dockerhub.py | 20 +- ersilia/hub/fetch/lazy_fetchers/hosted.py | 12 +- .../hub/fetch/pack/bentoml_pack/__init__.py | 16 +- ersilia/hub/fetch/pack/bentoml_pack/mode.py | 10 +- .../hub/fetch/pack/bentoml_pack/runners.py | 20 +- .../hub/fetch/pack/fastapi_pack/__init__.py | 13 +- ersilia/hub/fetch/pack/fastapi_pack/mode.py | 9 +- .../hub/fetch/pack/fastapi_pack/runners.py | 22 +- ersilia/hub/fetch/register/register.py | 17 +- .../hub/fetch/register/standard_example.py | 17 +- ersilia/hub/pull/pull.py | 27 +- ersilia/io/dataframe.py | 3 +- ersilia/io/input.py | 34 +- ersilia/io/output.py | 23 +- ersilia/io/output_logger.py | 2 +- ersilia/io/pure.py | 3 +- ersilia/io/readers/file.py | 28 +- ersilia/io/readers/pyinput.py | 2 +- ersilia/io/types/compound.py | 13 +- ersilia/io/types/naive.py | 3 + ersilia/io/types/protein.py | 4 +- ersilia/io/types/text.py | 11 +- ersilia/lake/base.py | 3 +- ersilia/lake/interface.py | 3 +- ersilia/lake/manager.py | 3 +- ersilia/lake/s3_logger.py | 3 +- ersilia/publish/deploy.py | 19 +- ersilia/publish/dockerhub.py | 11 +- ersilia/publish/inspect.py | 33 +- ersilia/publish/lake.py | 16 + ersilia/publish/publish.py | 15 +- ersilia/publish/rebase.py | 4 +- ersilia/publish/s3.py | 18 +- ersilia/publish/store.py | 10 +- ersilia/publish/test.py | 518 ++++++----- ersilia/serve/api.py | 96 +- ersilia/serve/autoservice.py | 52 +- .../environment/environment_variables.py | 3 +- ersilia/serve/schema.py | 5 +- ersilia/serve/services.py | 55 +- ersilia/serve/standard_api.py | 107 ++- ersilia/setup/baseconda.py | 7 +- ersilia/setup/basedocker.py | 6 +- ersilia/setup/conda.py | 7 + ersilia/setup/config.py | 9 +- ersilia/setup/profile.py | 7 + ersilia/setup/requirements/bentoml.py | 4 +- ersilia/setup/requirements/compound.py | 1 + ersilia/setup/requirements/docker.py | 2 +- ersilia/setup/requirements/eospath.py | 3 +- ersilia/setup/requirements/git.py | 2 +- ersilia/setup/requirements/isaura.py | 1 + ersilia/setup/requirements/ping.py | 3 +- ersilia/setup/utils/clone.py | 4 +- ersilia/store/api.py | 12 +- ersilia/store/utils.py | 40 + ersilia/tools/bentoml/__init__.py | 2 +- .../tools/bentoml/configuration/__init__.py | 7 +- .../bentoml/configuration/configparser.py | 2 +- ersilia/tools/bentoml/exceptions.py | 8 + ersilia/tools/bentoml/types.py | 159 +++- ersilia/tools/bentoml/utils/dataclasses.py | 3 +- ersilia/utils/__init__.py | 2 +- ersilia/utils/conda.py | 22 +- ersilia/utils/config.py | 14 +- ersilia/utils/cron.py | 5 +- ersilia/utils/csvfile.py | 2 +- ersilia/utils/docker.py | 27 +- ersilia/utils/download.py | 105 ++- ersilia/utils/dvc.py | 6 +- ersilia/utils/environment.py | 4 + .../utils/exceptions_utils/api_exceptions.py | 1 + .../base_information_exceptions.py | 4 +- .../utils/exceptions_utils/card_exceptions.py | 2 +- .../exceptions_utils/catalog_exceptions.py | 2 + .../exceptions_utils/clear_exceptions.py | 2 + .../exceptions_utils/close_exceptions.py | 2 + .../exceptions_utils/delete_exceptions.py | 2 + .../exceptions_utils/example_exceptions.py | 2 + ersilia/utils/exceptions_utils/exceptions.py | 38 +- .../exceptions_utils/fetch_exceptions.py | 3 +- .../handle_undecorated_exception.py | 6 +- .../exceptions_utils/hubdata_exceptions.py | 2 + .../utils/exceptions_utils/issue_reporting.py | 3 +- .../utils/exceptions_utils/pull_exceptions.py | 2 + .../exceptions_utils/serve_exceptions.py | 2 + .../exceptions_utils/setup_exceptions.py | 2 + .../utils/exceptions_utils/test_exceptions.py | 2 + .../throw_ersilia_exception.py | 3 +- ersilia/utils/identifiers/arbitrary.py | 17 + ersilia/utils/identifiers/compound.py | 40 +- ersilia/utils/identifiers/long.py | 3 +- ersilia/utils/identifiers/model.py | 3 +- ersilia/utils/identifiers/protein.py | 14 +- ersilia/utils/identifiers/short.py | 4 +- ersilia/utils/identifiers/timestamp.py | 12 + ersilia/utils/import.py | 2 +- ersilia/utils/installers.py | 22 +- ersilia/utils/logging.py | 11 +- ersilia/utils/paths.py | 21 +- ersilia/utils/session.py | 9 +- ersilia/utils/supp/conda_env_resolve.py | 2 +- ersilia/utils/system.py | 2 +- ersilia/utils/terminal.py | 14 +- ersilia/utils/tracking.py | 4 +- ersilia/utils/uninstall.py | 2 +- ersilia/utils/venv.py | 10 +- ersilia/utils/versioning.py | 2 +- ersilia/utils/zip.py | 2 +- pyproject.toml | 4 +- ruff.toml | 53 ++ test/cli/test_catalog.py | 4 +- test/cli/test_close.py | 53 +- test/cli/test_delete.py | 82 +- test/cli/test_fetch.py | 4 +- test/cli/test_run.py | 12 +- test/cli/test_serve.py | 22 +- test/models/eos0t01/pack.py | 1 - test/models/eos0t01/src/service.py | 3 +- test/models/eos0t02/pack.py | 1 - test/models/eos0t02/src/service.py | 4 +- test/models/eos0t03/pack.py | 1 - test/models/eos0t03/src/service.py | 7 +- .../eos0t04/model/framework/code/main.py | 2 +- test/models/eos0t04/pack.py | 5 +- test/models/eos0t04/src/service.py | 16 +- test/playground/commands.py | 10 +- test/playground/noxfile.py | 12 +- test/playground/rules.py | 2 +- test/playground/runner.py | 3 +- test/test_catalog.py | 2 + test/test_content.py | 2 +- test/test_inputs.py | 5 +- test/test_models.py | 8 +- test/test_url_search.py | 1 + 213 files changed, 3093 insertions(+), 1127 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 ruff.toml diff --git a/.github/scripts/airtableops.py b/.github/scripts/airtableops.py index 3598715ac..afe5d13a9 100644 --- a/.github/scripts/airtableops.py +++ b/.github/scripts/airtableops.py @@ -1,13 +1,12 @@ import os -import requests import pyairtable +import requests import yaml -from ersilia.hub.content.card import BaseInformation -from ersilia.hub.content.card import RepoMetadataFile -from ersilia.utils.terminal import run_command +from ersilia.hub.content.card import BaseInformation, RepoMetadataFile from ersilia.utils.logging import make_temp_dir +from ersilia.utils.terminal import run_command GITHUB_ORG = "ersilia-os" AIRTABLE_MODEL_HUB_BASE_ID = "appgxpCzCDNyGjWc8" diff --git a/.github/scripts/convert_airtable_to_json.py b/.github/scripts/convert_airtable_to_json.py index 539396fe1..462de8cf9 100644 --- a/.github/scripts/convert_airtable_to_json.py +++ b/.github/scripts/convert_airtable_to_json.py @@ -1,9 +1,10 @@ -import os import json import logging +import os + import boto3 -from botocore.exceptions import ClientError, NoCredentialsError import requests +from botocore.exceptions import ClientError, NoCredentialsError AIRTABLE_MODEL_HUB_BASE_ID = "appgxpCzCDNyGjWc8" AIRTABLE_TABLE_ID = "tblZGe2a2XeBxrEHP" diff --git a/.github/scripts/generate_eos_identifier.py b/.github/scripts/generate_eos_identifier.py index 49f1a9730..c7a895d80 100644 --- a/.github/scripts/generate_eos_identifier.py +++ b/.github/scripts/generate_eos_identifier.py @@ -1,8 +1,8 @@ +import json +import os import random import string -import json import subprocess -import os def run_command_check_output(cmd): diff --git a/.github/scripts/place_a_dockerfile_in_current_eos_repo.py b/.github/scripts/place_a_dockerfile_in_current_eos_repo.py index 163f045e3..99a126703 100644 --- a/.github/scripts/place_a_dockerfile_in_current_eos_repo.py +++ b/.github/scripts/place_a_dockerfile_in_current_eos_repo.py @@ -1,7 +1,8 @@ import os +import shutil import sys + import requests -import shutil BUILD_VERSIONS = ["ersiliapack", "legacy-bentoml", "multistage-condapack"] ENV_TYPES = ["conda", "pip"] diff --git a/.github/scripts/static_version_writer.py b/.github/scripts/static_version_writer.py index 7f2cd80f1..4ad358076 100644 --- a/.github/scripts/static_version_writer.py +++ b/.github/scripts/static_version_writer.py @@ -12,7 +12,7 @@ def wrapper(package_path): with open(toml_path, "r") as f: toml_content = f.read() toml_content = re.sub( - 'version\s=\s"[0-9\.]+"', f'version = "{version}"', toml_content + r'version\s=\s"[0-9\.]+"', f'version = "{version}"', toml_content ) with open(toml_path, "w") as f: f.write(toml_content) diff --git a/.github/scripts/update_metadata.py b/.github/scripts/update_metadata.py index b15cb081d..e27508672 100644 --- a/.github/scripts/update_metadata.py +++ b/.github/scripts/update_metadata.py @@ -1,9 +1,9 @@ import json -import yaml import logging import os import time +import yaml from github import Github, GithubException @@ -43,7 +43,7 @@ def load_json_input(self): This is the JSON data parsed from the new model submission request (GitHub issue) :return: dict """ - self.log.info(f"loading JSON input from env vars") + self.log.info("loading JSON input from env vars") # Load the JSON input from the env vars and convert it to a dict return json.loads(os.environ.get("JSON")) diff --git a/.github/scripts/update_model_request_template.py b/.github/scripts/update_model_request_template.py index 3b2fb633a..b64c3225c 100644 --- a/.github/scripts/update_model_request_template.py +++ b/.github/scripts/update_model_request_template.py @@ -1,5 +1,3 @@ -import os - # Paths to the tag files and the model_request template tag_file = "ersilia/hub/content/metadata/tag.txt" model_request_file = ".github/ISSUE_TEMPLATE/model_request.yml" diff --git a/.github/scripts/update_model_workflows_from_template.py b/.github/scripts/update_model_workflows_from_template.py index 29606ec54..082018f64 100644 --- a/.github/scripts/update_model_workflows_from_template.py +++ b/.github/scripts/update_model_workflows_from_template.py @@ -1,7 +1,7 @@ -import sys import os import shutil import subprocess +import sys model_repo = sys.argv[1] diff --git a/.github/scripts/upload_model_to_s3.py b/.github/scripts/upload_model_to_s3.py index 61551a6bb..3485178e1 100644 --- a/.github/scripts/upload_model_to_s3.py +++ b/.github/scripts/upload_model_to_s3.py @@ -1,4 +1,5 @@ import sys + from ersilia.publish.s3 import S3BucketRepoUploader model_id = sys.argv[1] diff --git a/.github/scripts/write_secrets.py b/.github/scripts/write_secrets.py index 05af4a445..129ddbb5d 100644 --- a/.github/scripts/write_secrets.py +++ b/.github/scripts/write_secrets.py @@ -1,6 +1,6 @@ +import collections import json import os -import collections OUTPUT = "secrets.json" diff --git a/.github/workflows/pr_check.yml b/.github/workflows/pr_check.yml index ae1f490b8..e1f579af8 100644 --- a/.github/workflows/pr_check.yml +++ b/.github/workflows/pr_check.yml @@ -5,7 +5,6 @@ on: branches: [ master ] push: branches: [ master ] - workflow_dispatch: permissions: @@ -14,22 +13,18 @@ permissions: jobs: build: runs-on: ubuntu-latest + defaults: + run: + shell: bash -el {0} steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v3.5.3 - - - name: Add conda to system path - run: echo $CONDA/bin >> $GITHUB_PATH - - - name: Set Python to 3.10.10 - run: - conda install -y python=3.10.10 - - - name: Source conda - run: source $CONDA/etc/profile.d/conda.sh + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + python-version: "3.10.10" - name: Install dependencies run: | - source activate conda init conda install git-lfs -c conda-forge git-lfs install @@ -37,19 +32,20 @@ jobs: - name: Install Ersilia run: | - source activate python --version echo "After conda init" conda init python -m pip install -e .[test] + - name: Run Ruff + run: ruff check --output-format=github . + - name: Hub catalog run: | ersilia catalog --hub - name: Fetch model from GitHub run: | - source activate ersilia -v fetch molecular-weight --from_github echo "Serving molecular-weight model." ersilia serve molecular-weight --track @@ -59,7 +55,6 @@ jobs: - name: Fetch model from S3 run: | - source activate ersilia -v fetch molecular-weight --from_s3 echo "Serving molecular-weight model." ersilia serve molecular-weight --track @@ -69,7 +64,6 @@ jobs: - name: Fetch model from DockerHub run: | - source activate ersilia -v fetch molecular-weight --from_dockerhub echo "Serving molecular-weight model." --track ersilia serve molecular-weight @@ -79,4 +73,4 @@ jobs: - name: Local catalog run: | - ersilia catalog --local + ersilia catalog --local \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..414c94035 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.4 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/README.md b/README.md index 1c1ae9d5d..ea30aacaf 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,40 @@ The Ersilia Model Hub is a Free, Open Source Software and we highly value new co The Ersilia Open Source Initiative adheres to the [Contributor Covenant](https://ersilia.gitbook.io/ersilia-wiki/code-of-conduct) code of conduct. +### Development Guidelines + +To maintain consistency and code quality, we follow certain coding and linting standards. Please adhere to these guidelines when contributing: + +#### Pre-commit Hooks + +We use `pre-commit` and `ruff` to automate code quality checks. Ensure you install and set up `pre-commit` and `ruff` before committing any changes: + +1. Install pre-commit: `pip install pre-commit` +2. Set up pre-commit hooks in your local repository by running: + ```bash + pre-commit install + ``` +3. When you commit it automatically fix the issues but will fail for critical error such as missing docstring on a public class and public methods. + +#### Manual with Ruff + +1. Run `ruff` to check for linting errors: + ```bash + ruff check . + ``` +2. Automatically fix linting issues (where possible): + ```bash + ruff check . --fix + ``` + +#### Docstring Style + +We adhere to the [NumPy-style docstring format](https://numpydoc.readthedocs.io/en/latest/format.html). Please document all public methods and functions using this style. + +Consistent documentation ensures the code is easy to understand and maintain. + +Thank you for your contributions and for helping make the Ersilia Model Hub a better project! + ### Submit a New Model If you want to incorporate a new model in the platform, open a new issue using the [model request template](https://github.com/ersilia-os/ersilia/issues/new?assignees=&labels=new-model&template=model_request.yml&title=%F0%9F%A6%A0+Model+Request%3A+%3Cname%3E) or contact us using the following [form](https://www.ersilia.io/request-model). diff --git a/conftest.py b/conftest.py index 96878baef..cea0c609a 100644 --- a/conftest.py +++ b/conftest.py @@ -1,9 +1,10 @@ -from test.playground.shared import results -from rich.table import Table -from rich.console import Console -from rich.text import Text from rich import box +from rich.console import Console from rich.panel import Panel +from rich.table import Table +from rich.text import Text + +from test.playground.shared import results def pytest_terminal_summary(terminalreporter, exitstatus, config): diff --git a/docs/conf.py b/docs/conf.py index 687122d4a..8769d06cc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,9 +1,32 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file does only contain a selection of the most common options. For a +# full list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- import datetime project = "Ersilia" -copyright = f"{datetime.datetime.now().year}, Ersilia Open Source Initiative" -author = "Miquel Duran-Frigola and Abel Legese" +now = datetime.datetime.now() +copyright = "{0}, Ersilia Open Source Initiative".format(now.year) +author = "Miquel Duran-Frigola" + +# The short X.Y version version = "" release = "" diff --git a/ersilia/__init__.py b/ersilia/__init__.py index 63852d59f..e1ae49a67 100644 --- a/ersilia/__init__.py +++ b/ersilia/__init__.py @@ -1,4 +1,5 @@ # External imports +# ruff: noqa import os from ._version import __version__ import warnings diff --git a/ersilia/_version.py b/ersilia/_version.py index 90c4dfe99..3767f9a28 100644 --- a/ersilia/_version.py +++ b/ersilia/_version.py @@ -1,6 +1,6 @@ -from urllib.request import urlopen import json import os +from urllib.request import urlopen STATIC_VERSION_FILE = "_static_version.py" PACKAGE_NAME = "ersilia" diff --git a/ersilia/auth/auth.py b/ersilia/auth/auth.py index f83da77d5..a37b6cef3 100644 --- a/ersilia/auth/auth.py +++ b/ersilia/auth/auth.py @@ -1,16 +1,12 @@ -""" -Deal with privileges in Ersilia. -Base on GitHub login. -""" - -from pathlib import Path import os +from pathlib import Path + import yaml try: from github import Github from github.GithubException import UnknownObjectException -except ModuleNotFoundError as err: +except ModuleNotFoundError: Github = None UnknownObjectException = None @@ -19,7 +15,11 @@ SECRET_REPO = "ersilia-os/ersilia-secrets" -class Auth(object): +class Auth: + """ + This class handles authentication. + """ + def __init__(self): self.hosts_yml = os.path.join(str(Path.home()), ".config", "gh", "hosts.yml") if os.path.exists(self.hosts_yml): diff --git a/ersilia/cli/__init__.py b/ersilia/cli/__init__.py index 711999b04..8eb3e5c49 100644 --- a/ersilia/cli/__init__.py +++ b/ersilia/cli/__init__.py @@ -1,8 +1,10 @@ +from ..utils.session import create_session_dir from .create_cli import create_ersilia_cli from .echo import echo -from ..utils.session import create_session_dir cli = create_ersilia_cli() create_session_dir() if __name__ == "__main__": cli() + +__all__ = ["echo"] diff --git a/ersilia/cli/cmd.py b/ersilia/cli/cmd.py index 85052d689..1b863032f 100644 --- a/ersilia/cli/cmd.py +++ b/ersilia/cli/cmd.py @@ -10,53 +10,106 @@ def __init__(self): pass def auth(self): + """ + Authenticate the user. + """ m = importlib.import_module("ersilia.cli.commands.auth") m.auth_cmd() def catalog(self): + """ + Display the catalog. + """ m = importlib.import_module("ersilia.cli.commands.catalog") m.catalog_cmd() def uninstall(self): + """ + Uninstall the application. + """ m = importlib.import_module("ersilia.cli.commands.uninstall") m.uninstall_cmd() def close(self): + """ + Close the application. + """ m = importlib.import_module("ersilia.cli.commands.close") m.close_cmd() def delete(self): + """ + Delete the application. + """ m = importlib.import_module("ersilia.cli.commands.delete") m.delete_cmd() def example(self): + """ + Show an example. + """ m = importlib.import_module("ersilia.cli.commands.example") m.example_cmd() def info(self): + """ + Display information. + """ m = importlib.import_module("ersilia.cli.commands.info") m.info_cmd() def fetch(self): + """ + Fetch data. + """ m = importlib.import_module("ersilia.cli.commands.fetch") m.fetch_cmd() def publish(self): + """ + Publish data. + """ m = importlib.import_module("ersilia.cli.commands.publish") m.publish_cmd() def run(self): + """ + Execute the command. + """ m = importlib.import_module("ersilia.cli.commands.run") m.run_cmd() + def stop(self): + """ + Stop the command. + """ + m = importlib.import_module("ersilia.cli.commands.stop") + m.stop_cmd() + + def restart(self): + """ + Restart the command. + """ + m = importlib.import_module("ersilia.cli.commands.restart") + m.restart_cmd() + def serve(self): + """ + Serve the application. + """ m = importlib.import_module("ersilia.cli.commands.serve") m.serve_cmd() def setup(self): + """ + Set up the application. + """ m = importlib.import_module("ersilia.cli.commands.setup") m.setup_cmd() def test(self): + """ + Test the application. + """ m = importlib.import_module("ersilia.cli.commands.test") m.test_cmd() diff --git a/ersilia/cli/commands/__init__.py b/ersilia/cli/commands/__init__.py index a02400555..2f63f6c83 100644 --- a/ersilia/cli/commands/__init__.py +++ b/ersilia/cli/commands/__init__.py @@ -1,15 +1,24 @@ -import click import functools -from ... import __version__ -from ... import logger + +import click + +from ... import __version__, logger from ..echo import Silencer class ErsiliaCommandGroup(click.Group): + """ + Command group for Ersilia CLI commands. + """ + NUMBER_OF_COMMON_PARAMS = 2 @staticmethod def bentoml_common_params(func): + """ + Add common parameters to the command. + """ + @click.option( "-q", "--quiet", @@ -24,6 +33,10 @@ def wrapper(quiet, *args, **kwargs): return wrapper def command(self, *args, **kwargs): + """ + Register a new command with common parameters. + """ + def wrapper(func): func = ErsiliaCommandGroup.bentoml_common_params(func) func.__click_params__ = ( diff --git a/ersilia/cli/commands/catalog.py b/ersilia/cli/commands/catalog.py index 04a9c8717..eb32ad305 100644 --- a/ersilia/cli/commands/catalog.py +++ b/ersilia/cli/commands/catalog.py @@ -1,8 +1,8 @@ import click -from . import ersilia_cli -from ...hub.content.catalog import ModelCatalog from ...hub.content.card import ModelCard +from ...hub.content.catalog import ModelCatalog +from . import ersilia_cli def catalog_cmd(): diff --git a/ersilia/cli/commands/close.py b/ersilia/cli/commands/close.py index 79b2be96e..a5bc7a97f 100644 --- a/ersilia/cli/commands/close.py +++ b/ersilia/cli/commands/close.py @@ -1,15 +1,13 @@ -import datetime -import os -from . import ersilia_cli -from .. import echo from ... import ErsiliaModel from ...core.session import Session from ...utils.session import deregister_model_session +from .. import echo +from . import ersilia_cli def close_cmd(): """ - Closes the current session. + Closes the current session of the served model. This command allows users to close the current session and clean up any resources. diff --git a/ersilia/cli/commands/delete.py b/ersilia/cli/commands/delete.py index 2d729eff8..a86d55836 100644 --- a/ersilia/cli/commands/delete.py +++ b/ersilia/cli/commands/delete.py @@ -1,13 +1,10 @@ import click -import csv +from ... import ModelBase from ...hub.content.catalog import ModelCatalog - - -from . import ersilia_cli -from .. import echo from ...hub.delete.delete import ModelFullDeleter -from ... import ModelBase +from .. import echo +from . import ersilia_cli def delete_cmd(): diff --git a/ersilia/cli/commands/example.py b/ersilia/cli/commands/example.py index 2377c6689..44efb5ef1 100644 --- a/ersilia/cli/commands/example.py +++ b/ersilia/cli/commands/example.py @@ -1,11 +1,12 @@ -import click import json -from . import ersilia_cli -from .. import echo -from ...io.input import ExampleGenerator -from ...core.session import Session +import click + from ... import ModelBase +from ...core.session import Session +from ...io.input import ExampleGenerator +from .. import echo +from . import ersilia_cli def example_cmd(): diff --git a/ersilia/cli/commands/fetch.py b/ersilia/cli/commands/fetch.py index 292ac2a5f..af731ec80 100644 --- a/ersilia/cli/commands/fetch.py +++ b/ersilia/cli/commands/fetch.py @@ -1,10 +1,12 @@ -import click import asyncio + +import click import nest_asyncio -from . import ersilia_cli -from .. import echo -from ...hub.fetch.fetch import ModelFetcher + from ... import ModelBase +from ...hub.fetch.fetch import ModelFetcher +from .. import echo +from . import ersilia_cli nest_asyncio.apply() diff --git a/ersilia/cli/commands/info.py b/ersilia/cli/commands/info.py index 373605625..ba106c6dc 100644 --- a/ersilia/cli/commands/info.py +++ b/ersilia/cli/commands/info.py @@ -1,10 +1,12 @@ import json + import click -from . import ersilia_cli -from .. import echo + from ... import ErsiliaModel from ...core.session import Session from ...hub.content.information import InformationDisplayer +from .. import echo +from . import ersilia_cli def info_cmd(): diff --git a/ersilia/cli/commands/publish.py b/ersilia/cli/commands/publish.py index 1edcbda76..b8a8649a1 100644 --- a/ersilia/cli/commands/publish.py +++ b/ersilia/cli/commands/publish.py @@ -1,10 +1,10 @@ import click -from . import ersilia_cli -from .. import echo -from ...publish.publish import ModelPublisher -from ...publish.lake import LakeStorer from ... import ModelBase +from ...publish.lake import LakeStorer +from ...publish.publish import ModelPublisher +from .. import echo +from . import ersilia_cli def publish_cmd(): diff --git a/ersilia/cli/commands/run.py b/ersilia/cli/commands/run.py index 606244610..25d110a17 100644 --- a/ersilia/cli/commands/run.py +++ b/ersilia/cli/commands/run.py @@ -1,14 +1,13 @@ -import click import json import types -import time -from . import ersilia_cli -from .. import echo +import click + from ... import ErsiliaModel from ...core.session import Session -from ...core.tracking import RunTracker from ...utils.terminal import print_result_table +from .. import echo +from . import ersilia_cli def run_cmd(): diff --git a/ersilia/cli/commands/serve.py b/ersilia/cli/commands/serve.py index 2d106be35..52f0da246 100644 --- a/ersilia/cli/commands/serve.py +++ b/ersilia/cli/commands/serve.py @@ -1,11 +1,11 @@ import click -from .. import echo -from . import ersilia_cli from ... import ErsiliaModel -from ..messages import ModelNotFound -from ...store.utils import OutputSource, ModelNotInStore, store_has_model +from ...store.utils import ModelNotInStore, OutputSource, store_has_model from ...utils.session import register_model_session +from .. import echo +from ..messages import ModelNotFound +from . import ersilia_cli def serve_cmd(): diff --git a/ersilia/cli/commands/setup.py b/ersilia/cli/commands/setup.py index 0317fb6be..7a11654a9 100644 --- a/ersilia/cli/commands/setup.py +++ b/ersilia/cli/commands/setup.py @@ -1,7 +1,7 @@ import click -from . import ersilia_cli from ...utils.installers import base_installer, full_installer +from . import ersilia_cli def setup_cmd(): diff --git a/ersilia/cli/commands/test.py b/ersilia/cli/commands/test.py index 73bca0208..9bc58e966 100644 --- a/ersilia/cli/commands/test.py +++ b/ersilia/cli/commands/test.py @@ -1,7 +1,8 @@ import click + from ...cli import echo -from . import ersilia_cli from ...publish.test import ModelTester +from . import ersilia_cli def test_cmd(): diff --git a/ersilia/cli/commands/uninstall.py b/ersilia/cli/commands/uninstall.py index 55a5fbd5b..e5bc9cfc0 100644 --- a/ersilia/cli/commands/uninstall.py +++ b/ersilia/cli/commands/uninstall.py @@ -1,5 +1,5 @@ -from . import ersilia_cli from ...utils.uninstall import Uninstaller +from . import ersilia_cli def uninstall_cmd(): diff --git a/ersilia/cli/echo.py b/ersilia/cli/echo.py index 61febb6af..87eb6541d 100644 --- a/ersilia/cli/echo.py +++ b/ersilia/cli/echo.py @@ -2,9 +2,11 @@ import emoji except: emoji = None -import click -import os import json +import os + +import click + from ..default import SILENCE_FILE from ..utils.session import get_session_dir diff --git a/ersilia/cli/messages.py b/ersilia/cli/messages.py index f6428e5fe..d321a5378 100644 --- a/ersilia/cli/messages.py +++ b/ersilia/cli/messages.py @@ -1,7 +1,8 @@ -from .echo import echo -from ..default import ERSILIA_MODEL_HUB_URL import sys +from ..default import ERSILIA_MODEL_HUB_URL +from .echo import echo + class ModelNotFound(object): """ diff --git a/ersilia/core/base.py b/ersilia/core/base.py index 45f8e766d..260cc1fbe 100644 --- a/ersilia/core/base.py +++ b/ersilia/core/base.py @@ -1,10 +1,11 @@ import os import subprocess from pathlib import Path + +from .. import logger +from ..default import EOS from ..utils.config import Config, Credentials from ..utils.paths import resolve_pack_method -from ..default import EOS -from .. import logger home = str(Path.home()) diff --git a/ersilia/core/model.py b/ersilia/core/model.py index 6be80b310..5514b29f1 100644 --- a/ersilia/core/model.py +++ b/ersilia/core/model.py @@ -1,43 +1,50 @@ -import os +import asyncio +import collections import csv import json +import os +import sys import time import types -import asyncio -import collections -import sys from click import secho as echo # Style-aware echo from .. import logger -from ..serve.api import Api -from .session import Session +from ..default import ( + APIS_LIST_FILE, + CARD_FILE, + DEFAULT_BATCH_SIZE, + EOS, + FETCHED_MODELS_FILENAME, + INFORMATION_FILE, + MODEL_SIZE_FILE, +) from ..hub.fetch.fetch import ModelFetcher -from .base import ErsiliaBase +from ..io.input import BaseIOGetter, ExampleGenerator +from ..io.output import TabularOutputStacker +from ..io.readers.file import FileTyper, TabularFileReader from ..lake.base import LakeBase -from ..utils import tmp_pid_file -from .modelbase import ModelBase -from ..serve.schema import ApiSchema -from ..utils.hdf5 import Hdf5DataLoader -from ..utils.csvfile import CsvDataLoader -from ..utils.terminal import yes_no_input -from ..utils.docker import ContainerMetricsSampler +from ..serve.api import Api from ..serve.autoservice import AutoService, PulledDockerImageService -from ..io.output import TabularOutputStacker +from ..serve.schema import ApiSchema from ..serve.standard_api import StandardCSVRunApi -from ..io.input import ExampleGenerator, BaseIOGetter -from .tracking import RunTracker -from ..io.readers.file import FileTyper, TabularFileReader -from ..store.api import InferenceStoreApi -from ..store.utils import OutputSource -from ..utils.exceptions_utils.api_exceptions import ApiSpecifiedOutputError -from ..default import FETCHED_MODELS_FILENAME, MODEL_SIZE_FILE, CARD_FILE, EOS -from ..default import DEFAULT_BATCH_SIZE, APIS_LIST_FILE, INFORMATION_FILE -from ..utils.logging import make_temp_dir from ..setup.requirements.compound import ( ChemblWebResourceClientRequirement, RdkitRequirement, ) +from ..store.api import InferenceStoreApi +from ..store.utils import OutputSource +from ..utils import tmp_pid_file +from ..utils.csvfile import CsvDataLoader +from ..utils.docker import ContainerMetricsSampler +from ..utils.exceptions_utils.api_exceptions import ApiSpecifiedOutputError +from ..utils.hdf5 import Hdf5DataLoader +from ..utils.logging import make_temp_dir +from ..utils.terminal import yes_no_input +from .base import ErsiliaBase +from .modelbase import ModelBase +from .session import Session +from .tracking import RunTracker try: import pandas as pd @@ -96,7 +103,10 @@ class ErsiliaModel(ErsiliaBase): .. code-block:: python model = ErsiliaModel(model="model_id") - result = model.run(input="input_data.csv", output="output_data.csv") + result = model.run( + input="input_data.csv", + output="output_data.csv", + ) Closing a model: diff --git a/ersilia/core/modelbase.py b/ersilia/core/modelbase.py index 04bdf3add..2c1c49c9e 100644 --- a/ersilia/core/modelbase.py +++ b/ersilia/core/modelbase.py @@ -1,13 +1,12 @@ -import os import json +import os -from .. import ErsiliaBase -from ..hub.content.slug import Slug -from ..hub.fetch import STATUS_FILE, DONE_TAG +from .. import ErsiliaBase, throw_ersilia_exception from ..default import IS_FETCHED_FROM_DOCKERHUB_FILE -from ..utils.paths import get_metadata_from_base_dir +from ..hub.content.slug import Slug +from ..hub.fetch import DONE_TAG, STATUS_FILE from ..utils.exceptions_utils.exceptions import InvalidModelIdentifierError -from .. import throw_ersilia_exception +from ..utils.paths import get_metadata_from_base_dir class ModelBase(ErsiliaBase): diff --git a/ersilia/core/session.py b/ersilia/core/session.py index 95cf0f695..3fe78a382 100644 --- a/ersilia/core/session.py +++ b/ersilia/core/session.py @@ -1,12 +1,10 @@ -import os -import csv import json +import os import time import uuid -import shutil -from ..utils.session import get_session_dir from ..default import SESSION_JSON +from ..utils.session import get_session_dir from .base import ErsiliaBase diff --git a/ersilia/core/tracking.py b/ersilia/core/tracking.py index 3fd16fb75..ba80e7edf 100644 --- a/ersilia/core/tracking.py +++ b/ersilia/core/tracking.py @@ -1,33 +1,32 @@ +import copy +import csv +import json import os import re +import resource import sys -import csv -import json -import copy -import boto3 -import psutil -from loguru import logger as logging -import requests import tempfile import types -import resource -from .session import Session from datetime import datetime -from datetime import timedelta -from .base import ErsiliaBase -from ..utils.docker import SimpleDocker -from ..utils.session import get_session_dir, get_session_uuid + +import boto3 +import psutil +import requests +from botocore.exceptions import ClientError, NoCredentialsError +from loguru import logger as logging + +from ..default import SESSION_JSON +from ..io.output_logger import TabularResultLogger from ..utils.csvfile import CsvDataLoader +from ..utils.exceptions_utils.throw_ersilia_exception import throw_ersilia_exception +from ..utils.session import get_session_dir, get_session_uuid from ..utils.tracking import ( + RUN_DATA_STUB, init_tracking_summary, update_tracking_summary, - RUN_DATA_STUB, ) -from ..utils.exceptions_utils.throw_ersilia_exception import throw_ersilia_exception -from ..default import SESSION_JSON -from ..io.output_logger import TabularResultLogger -from botocore.exceptions import ClientError, NoCredentialsError - +from .base import ErsiliaBase +from .session import Session AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY") @@ -452,9 +451,10 @@ def check_types(self, result, metadata): dtypes_list[key].add(type(value).__name__) mismatched_types = 0 - for column, types in dtypes_list.items(): + for column, dtype_set in dtypes_list.items(): if not all( - type_dict.get(dtype) == metadata["Output Type"][0] for dtype in types + type_dict.get(dtype) == metadata["Output Type"][0] + for dtype in dtype_set ): mismatched_types += 1 diff --git a/ersilia/db/disk/fetched.py b/ersilia/db/disk/fetched.py index 413809538..b4063b867 100644 --- a/ersilia/db/disk/fetched.py +++ b/ersilia/db/disk/fetched.py @@ -1,8 +1,8 @@ -import os import csv +import os from ... import ErsiliaBase -from ...default import FETCHED_MODELS_FILENAME, EOS +from ...default import EOS, FETCHED_MODELS_FILENAME class FetchedModelsManager(ErsiliaBase): diff --git a/ersilia/db/environments/localdb.py b/ersilia/db/environments/localdb.py index 6eac1359b..7ac425922 100644 --- a/ersilia/db/environments/localdb.py +++ b/ersilia/db/environments/localdb.py @@ -1,5 +1,6 @@ -import sqlite3 import os +import sqlite3 + from ... import ErsiliaBase ENVIRONMENTDB_FILE = ".environment.db" @@ -21,8 +22,8 @@ class EnvironmentDb(ErsiliaBase): Examples -------- >>> env_db = EnvironmentDb(config_json) - >>> env_db.table = 'conda' - >>> env_db.insert('model_id', 'venv_name') + >>> env_db.table = "conda" + >>> env_db.insert("model_id", "venv_name") """ def __init__(self, config_json=None): @@ -66,6 +67,9 @@ def _connect(self): return sqlite3.connect(self.file_path) def create_table(self): + """ + Create table if it does not exist. + """ if self._table is None: return sql = """ diff --git a/ersilia/db/environments/managers.py b/ersilia/db/environments/managers.py index 7dd41c85c..592a9e07c 100644 --- a/ersilia/db/environments/managers.py +++ b/ersilia/db/environments/managers.py @@ -1,22 +1,20 @@ import os -import tempfile import re import shutil +import sys from ...core.base import ErsiliaBase +from ...default import DOCKERHUB_LATEST_TAG, DOCKERHUB_ORG from ...setup.requirements.docker import DockerRequirement -from ...utils.paths import Paths -from ...utils.terminal import run_command from ...utils.docker import SimpleDocker, resolve_platform -from ...utils.system import is_inside_docker from ...utils.identifiers.short import ShortIdentifier +from ...utils.logging import make_temp_dir +from ...utils.paths import Paths from ...utils.ports import find_free_port -from .localdb import EnvironmentDb -from ...default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG from ...utils.session import get_session_dir -from ...utils.logging import make_temp_dir - -import sys +from ...utils.system import is_inside_docker +from ...utils.terminal import run_command +from .localdb import EnvironmentDb BENTOML_DOCKERPORT = 5000 INTERNAL_DOCKERPORT = 80 @@ -40,9 +38,15 @@ class DockerManager(ErsiliaBase): Examples -------- - >>> docker_manager = DockerManager(config_json=config, preferred_port=8080) - >>> docker_manager.build(model_id='eosxxxx', docker_user='user', docker_pwd='pass') - >>> docker_manager.run(model_id='eosxxxx', workers=2) + >>> docker_manager = DockerManager( + ... config_json=config, preferred_port=8080 + ... ) + >>> docker_manager.build( + ... model_id="eosxxxx", + ... docker_user="user", + ... docker_pwd="pass", + ... ) + >>> docker_manager.run(model_id="eosxxxx", workers=2) """ def __init__(self, config_json=None, preferred_port=None, with_bentoml=False): @@ -486,6 +490,9 @@ def delete_containers(self, model_id): self._delete_container(k) def delete_image(self, model_id): + """ + Deletes a Docker image associated with a model. + """ self.remove(model_id) def remove_stopped_containers(self): @@ -590,7 +597,7 @@ def prune(self): cmd = "docker system prune -f" run_command(cmd) - def delete_image(self, img): + def delete_image(self, img): # noqa: D102, F811 fn = os.path.join(get_session_dir(), "rm_image_output.txt") cmd = "docker image rm {0} --force 2> {1}".format(img, fn) run_command(cmd) @@ -652,10 +659,9 @@ def delete_images(self, model_id, purge_unnamed=True): self.delete_image(img) -class CondaManager(object): - +class CondaManager(object): # noqa: D101 def __init__(self): pass - def environments(self): + def environments(self): # noqa: D102 pass diff --git a/ersilia/db/hubdata/localslugs.py b/ersilia/db/hubdata/localslugs.py index 64ba73de8..72d360788 100644 --- a/ersilia/db/hubdata/localslugs.py +++ b/ersilia/db/hubdata/localslugs.py @@ -1,5 +1,6 @@ -import sqlite3 import os +import sqlite3 + from ... import ErsiliaBase SLUGDB_FILE = ".slug.db" @@ -25,6 +26,9 @@ def _connect(self): return sqlite3.connect(self.file_path) def create_table(self): + """ + Creates the slugs table in the database if it does not exist. + """ if self._table is None: return sql = """ diff --git a/ersilia/db/hubdata/sanitize.py b/ersilia/db/hubdata/sanitize.py index e72b61616..1530d1ed1 100644 --- a/ersilia/db/hubdata/sanitize.py +++ b/ersilia/db/hubdata/sanitize.py @@ -1,10 +1,8 @@ import validators -from .interfaces import AirtableInterface +from ... import ErsiliaBase, throw_ersilia_exception from ...utils.exceptions_utils.hubdata_exceptions import InvalidUrlInAirtableError - -from ... import ErsiliaBase -from ... import throw_ersilia_exception +from .interfaces import AirtableInterface # Potentially related issue: https://github.com/ersilia-os/ersilia/issues/1407 diff --git a/ersilia/db/hubdata/tables.py b/ersilia/db/hubdata/tables.py index e09a57fde..3a50bf9a2 100644 --- a/ersilia/db/hubdata/tables.py +++ b/ersilia/db/hubdata/tables.py @@ -1,16 +1,25 @@ -import boto3 - - class DynamoDbTable(object): + """ + Base class for DynamoDB tables. + """ + def __init__(self): pass class PredictionsTable(DynamoDbTable): + """ + Table for storing predictions. + """ + def __init__(self): pass class ModelsTable(DynamoDbTable): + """ + Table for storing models. + """ + def __init__(self): pass diff --git a/ersilia/default.py b/ersilia/default.py index 57fa4b765..bcf0fe2fd 100644 --- a/ersilia/default.py +++ b/ersilia/default.py @@ -1,7 +1,9 @@ -from pathlib import Path -import shutil import os +import shutil from enum import Enum +from pathlib import Path + +# ruff: noqa: D101, D102 # EOS environmental variables EOS = os.path.join(str(Path.home()), "eos") @@ -85,7 +87,7 @@ ISAURA_GDRIVE = "1LSCMHrCuXUDNH3WRbrLMW2FoiwMCxF2n" ISAURA_TEAM_GDRIVE = "0AG4WDDaU_00XUk9PVA" ISAURA_DIR = os.path.join(EOS, "isaura", "lake") - +REDIS_SERVER = "redis://127.0.0.1:6379" # Other FEATURE_MERGE_PATTERN = "---" diff --git a/ersilia/hub/bundle/bundle.py b/ersilia/hub/bundle/bundle.py index 8575367b1..be0f66dd8 100644 --- a/ersilia/hub/bundle/bundle.py +++ b/ersilia/hub/bundle/bundle.py @@ -1,11 +1,13 @@ +import collections import os + import yaml -import collections +from dockerfile_parse import DockerfileParser + from ...core.base import ErsiliaBase from ...default import CONDA_ENV_YML_FILE, DOCKERFILE_FILE from ...hub.fetch import MODEL_INSTALL_COMMANDS_FILE, REQUIREMENTS_TXT from .repo import DockerfileFile -from dockerfile_parse import DockerfileParser class BundleEnvironmentFile(ErsiliaBase): diff --git a/ersilia/hub/bundle/repo.py b/ersilia/hub/bundle/repo.py index f8ecbc4c1..e1fa08cf6 100644 --- a/ersilia/hub/bundle/repo.py +++ b/ersilia/hub/bundle/repo.py @@ -1,18 +1,17 @@ -import os import json -from re import I -from ... import ErsiliaBase -from ... import logger -from ...utils.paths import Paths -from ...utils.docker import SimpleDockerfileParser -from ...utils.conda import SimpleConda -from ...utils.system import SystemChecker +import os + +from ... import ErsiliaBase, logger from ...default import ( CONDA_ENV_YML_FILE, - DOCKER_BENTO_PATH, DEFAULT_MODEL_ID, + DOCKER_BENTO_PATH, DOCKERFILE_FILE, ) +from ...utils.conda import SimpleConda +from ...utils.docker import SimpleDockerfileParser +from ...utils.paths import Paths +from ...utils.system import SystemChecker ROOT_CHECKFILE = "README.md" @@ -321,6 +320,19 @@ def needs_conda(self) -> bool: return False def get_install_commands_from_dockerfile(self, fn): + """ + Get the install commands from the Dockerfile. + + Parameters + ---------- + fn : str + The path to the Dockerfile. + + Returns + ------- + list + The list of RUN commands from the Dockerfile. + """ dp = SimpleDockerfileParser(fn) runs = dp.get_runs() return runs diff --git a/ersilia/hub/bundle/status.py b/ersilia/hub/bundle/status.py index 995602d38..143562c83 100644 --- a/ersilia/hub/bundle/status.py +++ b/ersilia/hub/bundle/status.py @@ -1,12 +1,12 @@ -import os import importlib import json +import os -from ...utils.docker import SimpleDocker -from ...utils.conda import SimpleConda -from ...db.environments.localdb import EnvironmentDb from ... import ErsiliaBase +from ...db.environments.localdb import EnvironmentDb from ...default import IS_FETCHED_FROM_DOCKERHUB_FILE +from ...utils.conda import SimpleConda +from ...utils.docker import SimpleDocker class ModelStatus(ErsiliaBase): diff --git a/ersilia/hub/content/base_information.py b/ersilia/hub/content/base_information.py index b71574654..55981d5b7 100644 --- a/ersilia/hub/content/base_information.py +++ b/ersilia/hub/content/base_information.py @@ -1,5 +1,6 @@ -import os import datetime +import os + import validators try: @@ -8,44 +9,53 @@ from validators import ValidationError as ValidationFailure +from ... import ErsiliaBase from ...utils.exceptions_utils.base_information_exceptions import ( - SlugBaseInformationError, - IdentifierBaseInformationError, - StatusBaseInformationError, - TitleBaseInformationError, + BiomedicalAreaBaseInformationError, + BothIdentifiersBaseInformationError, DescriptionBaseInformationError, - ModeBaseInformationError, - SourceBaseInformationError, - SourceTypeBaseInformationError, + DockerArchitectureBaseInformationError, + DockerhubBaseInformationError, + GithubBaseInformationError, + IdentifierBaseInformationError, InputBaseInformationError, InputShapeBaseInformationError, + LicenseBaseInformationError, + MemoryGbBaseInformationError, + ModeBaseInformationError, OutputBaseInformationError, - OutputTypeBaseInformationError, - OutputShapeBaseInformationError, - OutputDimensionBaseInformationError, OutputConsistencyBaseInformationError, - TaskBaseInformationError, - SubtaskBaseInformationError, - BiomedicalAreaBaseInformationError, - TargetOrganismBaseInformationError, - TagBaseInformationError, + OutputDimensionBaseInformationError, + OutputShapeBaseInformationError, + OutputTypeBaseInformationError, PublicationBaseInformationError, PublicationTypeBaseInformationError, PublicationYearBaseInformationError, - SourceCodeBaseInformationError, - LicenseBaseInformationError, - GithubBaseInformationError, - DockerhubBaseInformationError, - DockerArchitectureBaseInformationError, S3BaseInformationError, - BothIdentifiersBaseInformationError, - MemoryGbBaseInformationError, + SlugBaseInformationError, + SourceBaseInformationError, + SourceCodeBaseInformationError, + SourceTypeBaseInformationError, + StatusBaseInformationError, + SubtaskBaseInformationError, + TagBaseInformationError, + TargetOrganismBaseInformationError, + TaskBaseInformationError, + TitleBaseInformationError, ) from ...utils.identifiers.model import ModelIdentifier -from ... import ErsiliaBase class BaseInformation(ErsiliaBase): + """ + Base class for handling and validating model information. + + Parameters + ---------- + config_json : dict + Configuration data in JSON format. + """ + def __init__(self, config_json): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) self._github = None @@ -90,10 +100,31 @@ def _read_default_fields(self, field): @property def identifier(self): + """ + Get the model identifier. + + Returns + ------- + str + The model identifier. + """ return self._identifier @identifier.setter def identifier(self, new_identifier): + """ + Set the model identifier. + + Parameters + ---------- + new_identifier : str + The new model identifier. + + Raises + ------ + IdentifierBaseInformationError + If the identifier is not valid. + """ mi = ModelIdentifier() if not mi.is_valid(new_identifier): raise IdentifierBaseInformationError @@ -101,10 +132,31 @@ def identifier(self, new_identifier): @property def slug(self): + """ + Get the model slug. + + Returns + ------- + str + The model slug. + """ return self._slug @slug.setter def slug(self, new_slug): + """ + Set the model slug. + + Parameters + ---------- + new_slug : str + The new model slug. + + Raises + ------ + SlugBaseInformationError + If the slug is not valid. + """ if new_slug.lower() != new_slug: raise SlugBaseInformationError if len(new_slug) > 60: @@ -115,20 +167,62 @@ def slug(self, new_slug): @property def status(self): + """ + Get the model status. + + Returns + ------- + str + The model status. + """ return self._status @status.setter def status(self, new_status): + """ + Set the model status. + + Parameters + ---------- + new_status : str + The new model status. + + Raises + ------ + StatusBaseInformationError + If the status is not valid. + """ if new_status not in self._read_default_fields("Status"): raise StatusBaseInformationError self._status = new_status @property def title(self): + """ + Get the model title. + + Returns + ------- + str + The model title. + """ return self._title @title.setter def title(self, new_title): + """ + Set the model title. + + Parameters + ---------- + new_title : str + The new model title. + + Raises + ------ + TitleBaseInformationError + If the title is not valid. + """ if len(new_title) > 300: raise TitleBaseInformationError if len(new_title) < 10: @@ -137,10 +231,31 @@ def title(self, new_title): @property def description(self): + """ + Get the model description. + + Returns + ------- + str + The model description. + """ return self._description @description.setter def description(self, new_description): + """ + Set the model description. + + Parameters + ---------- + new_description : str + The new model description. + + Raises + ------ + DescriptionBaseInformationError + If the description is not valid. + """ if len(new_description) < 200: raise DescriptionBaseInformationError if new_description == self._title: @@ -149,40 +264,124 @@ def description(self, new_description): @property def mode(self): + """ + Get the model mode. + + Returns + ------- + str + The model mode. + """ return self._mode @mode.setter def mode(self, new_mode): + """ + Set the model mode. + + Parameters + ---------- + new_mode : str + The new model mode. + + Raises + ------ + ModeBaseInformationError + If the mode is not valid. + """ if new_mode not in self._read_default_fields("Mode"): raise ModeBaseInformationError self._mode = new_mode @property def source(self): + """ + Get the model source. + + Returns + ------- + str + The model source. + """ return self._source @source.setter def source(self, new_source): + """ + Set the model source. + + Parameters + ---------- + new_source : str + The new model source. + + Raises + ------ + SourceBaseInformationError + If the source is not valid. + """ if new_source not in self._read_default_fields("Source"): raise SourceBaseInformationError self._source = new_source @property def source_type(self): + """ + Get the model source type. + + Returns + ------- + str + The model source type. + """ return self._source_type @source_type.setter def source_type(self, new_source_type): + """ + Set the model source type. + + Parameters + ---------- + new_source_type : str + The new model source type. + + Raises + ------ + SourceTypeBaseInformationError + If the source type is not valid. + """ if new_source_type not in self._read_default_fields("Source Type"): raise SourceTypeBaseInformationError self._source_type = new_source_type @property def input(self): + """ + Get the model input. + + Returns + ------- + list + The model input. + """ return self._input @input.setter def input(self, new_input): + """ + Set the model input. + + Parameters + ---------- + new_input : list or str + The new model input. + + Raises + ------ + InputBaseInformationError + If the input is not valid. + """ if type(new_input) is str: new_input = [new_input] if type(new_input) is not list: @@ -194,20 +393,62 @@ def input(self, new_input): @property def input_shape(self): + """ + Get the model input shape. + + Returns + ------- + str + The model input shape. + """ return self._input_shape @input_shape.setter def input_shape(self, new_input_shape): + """ + Set the model input shape. + + Parameters + ---------- + new_input_shape : str + The new model input shape. + + Raises + ------ + InputShapeBaseInformationError + If the input shape is not valid. + """ if new_input_shape not in self._read_default_fields("Input Shape"): raise InputShapeBaseInformationError self._input_shape = new_input_shape @property def task(self): + """ + Get the model task. + + Returns + ------- + list + The model task. + """ return self._task @task.setter def task(self, new_task): + """ + Set the model task. + + Parameters + ---------- + new_task : list or str + The new model task. + + Raises + ------ + TaskBaseInformationError + If the task is not valid. + """ if type(new_task) is str: new_task = [new_task] if type(new_task) is not list: @@ -219,10 +460,31 @@ def task(self, new_task): @property def subtask(self): + """ + Get the model subtask. + + Returns + ------- + list + The model subtask. + """ return self._subtask @subtask.setter def subtask(self, new_subtask): + """ + Set the model subtask. + + Parameters + ---------- + new_subtask : list or str + The new model subtask. + + Raises + ------ + SubtaskBaseInformationError + If the subtask is not valid. + """ if type(new_subtask) is str: new_subtask = [new_subtask] if type(new_subtask) is not list: @@ -234,10 +496,31 @@ def subtask(self, new_subtask): @property def biomedical_area(self): + """ + Get the model biomedical area. + + Returns + ------- + list + The model biomedical area. + """ return self._biomedical_area @biomedical_area.setter def biomedical_area(self, new_biomedical_area): + """ + Set the model biomedical area. + + Parameters + ---------- + new_biomedical_area : list or str + The new model biomedical area. + + Raises + ------ + BiomedicalAreaBaseInformationError + If the biomedical area is not valid. + """ if type(new_biomedical_area) is str: new_biomedical_area = [new_biomedical_area] if type(new_biomedical_area) is not list: @@ -249,10 +532,31 @@ def biomedical_area(self, new_biomedical_area): @property def target_organism(self): + """ + Get the model target organism. + + Returns + ------- + list + The model target organism. + """ return self._target_organism @target_organism.setter def target_organism(self, new_target_organism): + """ + Set the model target organism. + + Parameters + ---------- + new_target_organism : list or str + The new model target organism. + + Raises + ------ + TargetOrganismBaseInformationError + If the target organism is not valid. + """ if type(new_target_organism) is str: new_target_organism = [new_target_organism] if type(new_target_organism) is not list: @@ -264,10 +568,31 @@ def target_organism(self, new_target_organism): @property def output(self): + """ + Get the model output. + + Returns + ------- + list + The model output. + """ return self._output @output.setter def output(self, new_output): + """ + Set the model output. + + Parameters + ---------- + new_output : list or str + The new model output. + + Raises + ------ + OutputBaseInformationError + If the output is not valid. + """ if type(new_output) is str: new_output = [new_output] default_output = self._read_default_fields("Output") @@ -278,10 +603,31 @@ def output(self, new_output): @property def output_type(self): + """ + Get the model output type. + + Returns + ------- + list + The model output type. + """ return self._output_type @output_type.setter def output_type(self, new_output_type): + """ + Set the model output type. + + Parameters + ---------- + new_output_type : list or str + The new model output type. + + Raises + ------ + OutputTypeBaseInformationError + If the output type is not valid. + """ if type(new_output_type) is str: new_output_type = [new_output_type] default_output_type = self._read_default_fields("Output Type") @@ -292,10 +638,31 @@ def output_type(self, new_output_type): @property def output_shape(self): + """ + Get the model output shape. + + Returns + ------- + str + The model output shape. + """ return self._output_shape @output_shape.setter def output_shape(self, new_output_shape): + """ + Set the model output shape. + + Parameters + ---------- + new_output_shape : str + The new model output shape. + + Raises + ------ + OutputShapeBaseInformationError + If the output shape is not valid. + """ default_output_shape = self._read_default_fields("Output Shape") if new_output_shape not in default_output_shape: raise OutputShapeBaseInformationError @@ -303,10 +670,31 @@ def output_shape(self, new_output_shape): @property def output_dimension(self): + """ + Get the model output dimension. + + Returns + ------- + int + The model output dimension. + """ return self._output_dimension @output_dimension.setter def output_dimension(self, new_output_dimension): + """ + Set the model output dimension. + + Parameters + ---------- + new_output_dimension : int + The new model output dimension. + + Raises + ------ + OutputDimensionBaseInformationError + If the output dimension is not valid. + """ if type(new_output_dimension) is not int: raise OutputDimensionBaseInformationError if new_output_dimension < 1: @@ -315,10 +703,31 @@ def output_dimension(self, new_output_dimension): @property def output_consistency(self): + """ + Get the model output consistency. + + Returns + ------- + str + The model output consistency. + """ return self._output_consistency @output_consistency.setter def output_consistency(self, new_output_consistency): + """ + Set the model output consistency. + + Parameters + ---------- + new_output_consistency : str + The new model output consistency. + + Raises + ------ + OutputConsistencyBaseInformationError + If the output consistency is not valid. + """ default_output_consistency = self._read_default_fields("Output Consistency") if new_output_consistency not in default_output_consistency: raise OutputConsistencyBaseInformationError @@ -326,18 +735,55 @@ def output_consistency(self, new_output_consistency): @property def interpretation(self): + """ + Get the model interpretation. + + Returns + ------- + str + The model interpretation. + """ return self._interpretation @interpretation.setter def interpretation(self, new_interpretation): + """ + Set the model interpretation. + + Parameters + ---------- + new_interpretation : str + The new model interpretation. + """ self._interpretation = new_interpretation @property def tag(self): + """ + Get the model tags. + + Returns + ------- + list + The model tags. + """ return self._tag @tag.setter def tag(self, new_tag): + """ + Set the model tags. + + Parameters + ---------- + new_tag : list or str + The new model tags. + + Raises + ------ + TagBaseInformationError + If the tags are not valid. + """ if type(new_tag) is str: new_tag = [new_tag] if type(new_tag) is not list: @@ -350,30 +796,93 @@ def tag(self, new_tag): @property def publication(self): + """ + Get the model publication URL. + + Returns + ------- + str + The model publication URL. + """ return self._publication @publication.setter def publication(self, new_publication): + """ + Set the model publication URL. + + Parameters + ---------- + new_publication : str + The new model publication URL. + + Raises + ------ + PublicationBaseInformationError + If the publication URL is not valid. + """ if not self._is_valid_url(new_publication): raise PublicationBaseInformationError self._publication = new_publication @property def publication_type(self): + """ + Get the model publication type. + + Returns + ------- + str + The model publication type. + """ return self._publication_type @publication_type.setter def publication_type(self, new_publication_type): + """ + Set the model publication type. + + Parameters + ---------- + new_publication_type : str + The new model publication type. + + Raises + ------ + PublicationTypeBaseInformationError + If the publication type is not valid. + """ if new_publication_type not in self._read_default_fields("Publication Type"): raise PublicationTypeBaseInformationError self._publication_type = new_publication_type @property def publication_year(self): + """ + Get the model publication year. + + Returns + ------- + int + The model publication year. + """ return self._publication_year @publication_year.setter def publication_year(self, new_publication_year): + """ + Set the model publication year. + + Parameters + ---------- + new_publication_year : int + The new model publication year. + + Raises + ------ + PublicationYearBaseInformationError + If the publication year is not valid. + """ if type(new_publication_year) is not int: raise PublicationYearBaseInformationError if new_publication_year < 1900 or new_publication_year > datetime.today("Y"): @@ -382,42 +891,129 @@ def publication_year(self, new_publication_year): @property def source_code(self): + """ + Get the model source code URL. + + Returns + ------- + str + The model source code URL. + """ return self._source_code @source_code.setter def source_code(self, new_source_code): + """ + Set the model source code URL. + + Parameters + ---------- + new_source_code : str + The new model source code URL. + + Raises + ------ + SourceCodeBaseInformationError + If the source code URL is not valid. + """ if not self._is_valid_url(new_source_code): raise SourceCodeBaseInformationError self._source_code = new_source_code @property def license(self): + """ + Get the model license. + + Returns + ------- + str + The model license. + """ return self._license @license.setter def license(self, new_license): + """ + Set the model license. + + Parameters + ---------- + new_license : str + The new model license. + + Raises + ------ + LicenseBaseInformationError + If the license is not valid. + """ if new_license not in self._read_default_fields("License"): raise LicenseBaseInformationError self._license = new_license @property def date(self): + """ + Get the model date. + + Returns + ------- + str + The model date. + """ return self._date @date.setter def date(self, new_date): + """ + Set the model date. + + Parameters + ---------- + new_date : str + The new model date. + """ self._date = new_date @property def contributor(self): + """ + Get the model contributor. + + Returns + ------- + str + The model contributor. + """ return self._contributor @contributor.setter def contributor(self, new_contributor): + """ + Set the model contributor. + + Parameters + ---------- + new_contributor : str + The new model contributor. + """ self._contributor = new_contributor @property def github(self): + """ + Get the model GitHub URL. + + Returns + ------- + str + The model GitHub URL. + + Raises + ------ + GithubBaseInformationError + If the identifier is not set. + """ model_id = self.identifier if model_id is None: raise GithubBaseInformationError @@ -426,20 +1022,62 @@ def github(self): @property def dockerhub(self): + """ + Get the model DockerHub URL. + + Returns + ------- + str + The model DockerHub URL. + """ return self._dockerhub @dockerhub.setter def dockerhub(self, new_dockerhub_url): + """ + Set the model DockerHub URL. + + Parameters + ---------- + new_dockerhub_url : str + The new model DockerHub URL. + + Raises + ------ + DockerhubBaseInformationError + If the DockerHub URL is not valid. + """ if not new_dockerhub_url.startswith("https://hub.docker.com/r/ersiliaos/"): raise DockerhubBaseInformationError self._dockerhub = new_dockerhub_url @property def docker_architecture(self): + """ + Get the model Docker architecture. + + Returns + ------- + list + The model Docker architecture. + """ return self._docker_architecture @docker_architecture.setter def docker_architecture(self, new_docker_architecture): + """ + Set the model Docker architecture. + + Parameters + ---------- + new_docker_architecture : list or str + The new model Docker architecture. + + Raises + ------ + DockerArchitectureBaseInformationError + If the Docker architecture is not valid. + """ if type(new_docker_architecture) is str: new_docker_architecture = [new_docker_architecture] for d in new_docker_architecture: @@ -449,10 +1087,31 @@ def docker_architecture(self, new_docker_architecture): @property def s3(self): + """ + Get the model S3 URL. + + Returns + ------- + str + The model S3 URL. + """ return self._s3 @s3.setter def s3(self, new_s3_url): + """ + Set the model S3 URL. + + Parameters + ---------- + new_s3_url : str + The new model S3 URL. + + Raises + ------ + S3BaseInformationError + If the S3 URL is not valid. + """ if not new_s3_url.startswith( "https://ersilia-models-zipped.s3.eu-central-1.amazonaws.com/" ): @@ -461,6 +1120,19 @@ def s3(self, new_s3_url): @property def both_identifiers(self): + """ + Get both the model identifier and slug. + + Returns + ------- + tuple + The model identifier and slug. + + Raises + ------ + BothIdentifiersBaseInformationError + If either the identifier or slug is not set. + """ model_id = self.identifier slug = self.slug if model_id is None or slug is None: @@ -470,15 +1142,44 @@ def both_identifiers(self): @property def memory_gb(self): + """ + Get the model memory in GB. + + Returns + ------- + int + The model memory in GB. + """ return self._memory_gb @memory_gb.setter def memory_gb(self, new_memory_gb): + """ + Set the model memory in GB. + + Parameters + ---------- + new_memory_gb : int + The new model memory in GB. + + Raises + ------ + MemoryGbBaseInformationError + If the memory value is not valid. + """ if type(new_memory_gb) != int: raise MemoryGbBaseInformationError self._memory_gb = new_memory_gb def as_dict(self): + """ + Convert the model information to a dictionary. + + Returns + ------- + dict + The model information as a dictionary. + """ data = { "Identifier": self.identifier, "Slug": self.slug, @@ -515,41 +1216,46 @@ def as_dict(self): data = dict((k, v) for k, v in data.items() if v is not None) return data - def _assign(self, var, key, data): - if key in data: - var = data[key] - else: - var = None + def _assign(self, attr_name, key, data): + setattr(self, attr_name, data[key] if key in data else None) def from_dict(self, data): - self._assign(self.identifier, "Identifier", data) - self._assign(self.slug, "Slug", data) - self._assign(self.status, "Status", data) - self._assign(self.title, "Title", data) - self._assign(self.description, "Description", data) - self._assign(self.mode, "Mode", data) - self._assign(self.source, "Source", data) - self._assign(self.source_type, "Source Type", data) - self._assign(self.input, "Input", data) - self._assign(self.input_shape, "Input Shape", data) - self._assign(self.task, "Task", data) - self._assign(self.subtask, "Subtask", data) - self._assign(self.biomedical_area, "Biomedical Area", data) - self._assign(self.target_organism, "Target Organism", data) - self._assign(self.output, "Output", data) - self._assign(self.output_type, "Output Type", data) - self._assign(self.output_shape, "Output Shape", data) - self._assign(self.output_dimension, "Output Dimension", data) - self._assign(self.output_consistency, "Output Consistency", data) - self._assign(self.interpretation, "Interpretation", data) - self._assign(self.tag, "Tag", data) - self._assign(self.publication, "Publication", data) - self._assign(self.publication_type, "Publication Type", data) - self._assign(self.publication_year, "Publication Year", data) - self._assign(self.source_code, "Source Code", data) - self._assign(self.license, "License", data) - self._assign(self.contributor, "Contributor", data) - self._assign(self.dockerhub, "DockerHub", data) - self._assign(self.docker_architecture, "Docker Architecture", data) - self._assign(self.s3, "S3", data) - self._assign(self.memory_gb, "Memory Gb", data) + """ + Load the model information from a dictionary. + + Parameters + ---------- + data : dict + The model information as a dictionary. + """ + self._assign("identifier", "Identifier", data) + self._assign("slug", "Slug", data) + self._assign("status", "Status", data) + self._assign("title", "Title", data) + self._assign("description", "Description", data) + self._assign("mode", "Mode", data) + self._assign("source", "Source", data) + self._assign("source_type", "Source Type", data) + self._assign("input", "Input", data) + self._assign("input_shape", "Input Shape", data) + self._assign("task", "Task", data) + self._assign("subtask", "Subtask", data) + self._assign("biomedical_area", "Biomedical Area", data) + self._assign("target_organism", "Target Organism", data) + self._assign("output", "Output", data) + self._assign("output_type", "Output Type", data) + self._assign("output_shape", "Output Shape", data) + self._assign("output_dimension", "Output Dimension", data) + self._assign("output_consistency", "Output Consistency", data) + self._assign("interpretation", "Interpretation", data) + self._assign("tag", "Tag", data) + self._assign("publication", "Publication", data) + self._assign("publication_type", "Publication Type", data) + self._assign("publication_year", "Publication Year", data) + self._assign("source_code", "Source Code", data) + self._assign("license", "License", data) + self._assign("contributor", "Contributor", data) + self._assign("dockerhub", "DockerHub", data) + self._assign("docker_architecture", "Docker Architecture", data) + self._assign("s3", "S3", data) + self._assign("memory_gb", "Memory Gb", data) diff --git a/ersilia/hub/content/card.py b/ersilia/hub/content/card.py index 19a243fac..de7d3668b 100644 --- a/ersilia/hub/content/card.py +++ b/ersilia/hub/content/card.py @@ -1,14 +1,15 @@ -import os import json +import os + import requests import yaml -from .base_information import BaseInformation from ... import ErsiliaBase -from ...utils.terminal import run_command from ...auth.auth import Auth from ...db.hubdata.interfaces import JsonModelsInterface from ...utils.logging import make_temp_dir +from ...utils.terminal import run_command +from .base_information import BaseInformation try: from isaura.core.hdf5 import Hdf5Explorer @@ -17,8 +18,8 @@ from ...default import ( CARD_FILE, - METADATA_JSON_FILE, INFORMATION_FILE, + METADATA_JSON_FILE, METADATA_YAML_FILE, ) from ...utils.paths import get_metadata_from_base_dir @@ -257,6 +258,14 @@ def _model_github_url(self, model_id): return "https://github.com/ersilia-os/{0}".format(model_id) def parse(self, model_id): + """ + Parse the model information from the README file. + + Parameters + ---------- + model_id : str + The model identifier. + """ readme = os.path.join(self._dest_dir, model_id, "README.md") if os.path.exists(readme): with open(readme, "r") as f: @@ -428,12 +437,38 @@ def __init__(self, config_json=None): JsonModelsInterface.__init__(self, config_json=config_json) def get_card_by_model_id(self, model_id): + """ + Get the card information by model identifier. + + Parameters + ---------- + model_id : str + The model identifier. + + Returns + ------- + dict + The card information. + """ all_models = self.items_all() for model in all_models: if model["Identifier"] == model_id: return model def get_card_by_slug(self, slug): + """ + Get the card information by model slug. + + Parameters + ---------- + slug : str + The model slug. + + Returns + ------- + dict + The card information. + """ all_models = self.items_all() for model in all_models: if model["Slug"] == slug: diff --git a/ersilia/hub/content/catalog.py b/ersilia/hub/content/catalog.py index 5ed5d0f3f..a60dcd4f4 100644 --- a/ersilia/hub/content/catalog.py +++ b/ersilia/hub/content/catalog.py @@ -1,27 +1,25 @@ """See available models in the Ersilia Model Hub""" -import subprocess -import requests -import shutil -import os -import json import csv -from .card import ModelCard +import json +import os +import shutil +import subprocess + from ... import ErsiliaBase -from ...utils.identifiers.model import ModelIdentifier from ...db.hubdata.interfaces import JsonModelsInterface -from ...default import BENTOML_PATH, MODEL_SOURCE_FILE -from ...default import TableConstants -from ... import logger +from ...default import BENTOML_PATH, MODEL_SOURCE_FILE, TableConstants +from ...utils.identifiers.model import ModelIdentifier +from .card import ModelCard try: import webbrowser -except ModuleNotFoundError as err: +except ModuleNotFoundError: webbrowser = None try: from github import Github -except ModuleNotFoundError as err: +except ModuleNotFoundError: Github = None @@ -212,6 +210,20 @@ def __repr__(self): class ModelCatalog(ErsiliaBase): + """ + Class to handle the model catalog. + + This class provides methods to manage the model catalog, including adding, updating, + and retrieving models. + + Attributes + ---------- + LESS_FIELDS : list + List of fields with less information. + MORE_FIELDS : list + List of fields with more information. + """ + LESS_FIELDS = ["Identifier", "Slug"] MORE_FIELDS = LESS_FIELDS + [ "Title", @@ -283,7 +295,6 @@ def airtable(self): webbrowser.open("https://airtable.com/shrUcrUnd7jB9ChZV") # TODO Hardcoded def _get_catalog(self, columns: list, model_cards: list): - """Get the catalog of models""" R = [] columns = ["Index"] + columns @@ -348,7 +359,7 @@ def bentoml(self) -> CatalogTable: result = subprocess.run( ["bentoml", "list"], stdout=subprocess.PIPE, env=os.environ, timeout=10 ) - except Exception as e: + except Exception: shutil.rmtree(BENTOML_PATH) return None result = [r for r in result.stdout.decode("utf-8").split("\n") if r] diff --git a/ersilia/hub/content/columns_information.py b/ersilia/hub/content/columns_information.py index f9df6ea57..1e8abc79b 100644 --- a/ersilia/hub/content/columns_information.py +++ b/ersilia/hub/content/columns_information.py @@ -1,9 +1,9 @@ -import os import csv +import os import tempfile from urllib.request import urlopen -from ... import ErsiliaBase +from ... import ErsiliaBase ROOT = os.path.dirname(os.path.abspath(__file__)) @@ -13,6 +13,22 @@ class ColumnsInformation(ErsiliaBase): + """ + Class to handle the columns information of a model. + + This class provides methods to get columns information from local files or GitHub, + and validate the columns data. + + Parameters + ---------- + model_id : str + The model identifier. + api_name : str + The API name. + config_json : dict, optional + Configuration settings in JSON format. + """ + def __init__(self, model_id, api_name, config_json=None): self.model_id = model_id self.api_name = api_name @@ -50,7 +66,12 @@ def _get_columns_information_from_file(self, file_name): else: directions += [r[2]] descriptions += [r[3]] - return {"name": names, "type": types, "direction": directions, "description": descriptions} + return { + "name": names, + "type": types, + "direction": directions, + "description": descriptions, + } else: self.logger.debug( "Explicit columns data for {0} API does not exist in file {1}".format( @@ -58,11 +79,11 @@ def _get_columns_information_from_file(self, file_name): ) ) return None - + def _get_columns_information_from_local(self): file_name = os.path.join(self._model_path(self.model_id), self.relative_path) return self._get_columns_information_from_file(file_name) - + def _get_columns_information_from_github(self): org = "ersilia-os" branch = "main" @@ -74,7 +95,7 @@ def _get_columns_information_from_github(self): try: with urlopen(url) as response: data = response.read() - with open(file_name, 'wb') as f: + with open(file_name, "wb") as f: f.write(data) except Exception as e: self.logger.debug( @@ -84,7 +105,7 @@ def _get_columns_information_from_github(self): ) self.logger.warning(f"Warning: {e}") return None - + def _validate_columns_data(self, data): for d in data["name"]: if d[0].lower() != d[0]: @@ -96,9 +117,7 @@ def _validate_columns_data(self, data): for d in data["type"]: if d not in self.DATA_TYPES: raise ValueError( - "Type {0} is not an accepted type: {1}".format( - d, self.DATA_TYPES - ) + "Type {0} is not an accepted type: {1}".format(d, self.DATA_TYPES) ) for d in data["direction"]: if d not in self.DESIRED_DIRECTIONS: @@ -116,6 +135,14 @@ def _validate_columns_data(self, data): ) def load(self): + """ + Load the columns information. + + Returns + ------- + dict + The columns information. + """ data = self._get_columns_information_from_local() if data is None: data = self._get_columns_information_from_github() diff --git a/ersilia/hub/content/information.py b/ersilia/hub/content/information.py index f7eb0032d..7ea707650 100644 --- a/ersilia/hub/content/information.py +++ b/ersilia/hub/content/information.py @@ -1,5 +1,5 @@ -import os import json +import os try: import emoji @@ -7,23 +7,23 @@ emoji = None import click -from .columns_information import ColumnsInformation from ... import ErsiliaBase from ...default import ( - PACKMODE_FILE, API_SCHEMA_FILE, - MODEL_SIZE_FILE, - CARD_FILE, - SERVICE_CLASS_FILE, APIS_LIST_FILE, + CARD_FILE, + MODEL_SIZE_FILE, MODEL_SOURCE_FILE, + PACKMODE_FILE, + SERVICE_CLASS_FILE, ) from ...utils.paths import get_metadata_from_base_dir +from .columns_information import ColumnsInformation class Information(ErsiliaBase): """ - Class to handle the information of a model. + Class to handle the information of a models. This class provides methods to get various information about a model, such as pack mode, service class, model source, API schema, size, metadata, and card. @@ -125,7 +125,7 @@ def get(self) -> dict: Returns ------- dict - A dictionary containing various information about the model. + A dictionary containing several information about the model. """ data = { "pack_mode": self._get_pack_mode(), diff --git a/ersilia/hub/content/search.py b/ersilia/hub/content/search.py index aef8ff283..f4c5daf23 100644 --- a/ersilia/hub/content/search.py +++ b/ersilia/hub/content/search.py @@ -1,9 +1,11 @@ """Search for models""" -from .catalog import CatalogTable -import numpy as np import re +import numpy as np + +from .catalog import CatalogTable + class ModelSearcher(object): """ @@ -100,7 +102,7 @@ def search_text(self, s: str) -> CatalogTable: string_ratio.append(ratio) ratio = self.levenshtein_ratio_and_distance(s, r[1]) string_ratio.append(ratio) - x = re.split("\s", r[2]) + x = re.split(r"\s", r[2]) for r1 in x: ratio = self.levenshtein_ratio_and_distance(s, r1) string_ratio.append(ratio) diff --git a/ersilia/hub/content/slug.py b/ersilia/hub/content/slug.py index dcb07c9e3..ce548d84f 100644 --- a/ersilia/hub/content/slug.py +++ b/ersilia/hub/content/slug.py @@ -1,6 +1,6 @@ +from ... import ErsiliaBase from ...db.hubdata.localslugs import SlugDb from ...utils.identifiers.model import ModelIdentifier -from ... import ErsiliaBase from .card import ModelCard diff --git a/ersilia/hub/delete/delete.py b/ersilia/hub/delete/delete.py index 519741259..a76a5557e 100644 --- a/ersilia/hub/delete/delete.py +++ b/ersilia/hub/delete/delete.py @@ -1,26 +1,26 @@ import os -import shutil import os.path +import shutil from typing import Tuple + from ... import ErsiliaBase -from ...utils.terminal import run_command -from ...utils.environment import Environment -from ...utils.conda import SimpleConda -from ...utils.system import is_inside_docker -from ..content.catalog import ModelCatalog -from ..content.card import ModelCard +from ...db.disk.fetched import FetchedModelsManager from ...db.environments.localdb import EnvironmentDb -from ...db.hubdata.localslugs import SlugDb from ...db.environments.managers import DockerManager -from ...db.disk.fetched import FetchedModelsManager -from ..bundle.status import ModelStatus - +from ...db.hubdata.localslugs import SlugDb from ...default import ISAURA_FILE_TAG, ISAURA_FILE_TAG_LOCAL +from ...utils.conda import SimpleConda +from ...utils.environment import Environment from ...utils.session import ( + deregister_model_session, get_model_session, remove_session_dir, - deregister_model_session, ) +from ...utils.system import is_inside_docker +from ...utils.terminal import run_command +from ..bundle.status import ModelStatus +from ..content.card import ModelCard +from ..content.catalog import ModelCatalog def rmtree(path): @@ -104,6 +104,18 @@ def __init__(self, config_json=None): self.path = self._lake_dir def delete_if_exists(self, path): + """ + Delete the file or symbolic link at the given path if it exists. + Parameters + ---------- + path : str + The path to the file or symbolic link to be deleted. + Notes + ----- + This function checks if the given path is a file or a symbolic link. + If it is, the file or symbolic link is removed. + """ + if os.path.isfile(path): os.remove(path) if os.path.islink(path): @@ -405,6 +417,13 @@ def __init__(self, config_json=None): ErsiliaBase.__init__(self, config_json=config_json, credentials_json=None) def pip_uninstall(self, model_id): + """ + Uninstalls a Python package using pip. + Parameters + ---------- + model_id : str + The name of the package to uninstall. + """ run_command("echo y | pip uninstall %s" % model_id) def delete(self, model_id: str): diff --git a/ersilia/hub/fetch/__init__.py b/ersilia/hub/fetch/__init__.py index baee5e047..d071e5d45 100644 --- a/ersilia/hub/fetch/__init__.py +++ b/ersilia/hub/fetch/__init__.py @@ -1,6 +1,7 @@ import validators -from ...db.hubdata.interfaces import JsonModelsInterface + from ... import ErsiliaBase +from ...db.hubdata.interfaces import JsonModelsInterface MODEL_INSTALL_COMMANDS_FILE = "model_install_commands.sh" DOCKERFILE = "Dockerfile" @@ -14,6 +15,21 @@ class ModelURLResolver(ErsiliaBase): + """ + Class to resolve the URL of a model. + + This class provides methods to resolve the URL of a model based on its ID. + + Parameters + ---------- + model_id : str + The ID of the model. + config_json : dict, optional + Configuration settings in JSON format. + credentials_json : dict, optional + Credentials settings in JSON format. + """ + def __init__(self, model_id, config_json=None, credentials_json=None): super().__init__(config_json, credentials_json) self.model_id = model_id diff --git a/ersilia/hub/fetch/actions/__init__.py b/ersilia/hub/fetch/actions/__init__.py index 8c912faa8..ea43df78e 100644 --- a/ersilia/hub/fetch/actions/__init__.py +++ b/ersilia/hub/fetch/actions/__init__.py @@ -2,6 +2,21 @@ class BaseAction(ErsiliaBase): + """ + Base class for actions. + + This class provides common methods for actions. + + Parameters + ---------- + model_id : str + The ID of the model. + config_json : dict + Configuration settings in JSON format. + credentials_json : dict + Credentials settings in JSON format. + """ + def __init__(self, model_id, config_json, credentials_json): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json diff --git a/ersilia/hub/fetch/actions/check.py b/ersilia/hub/fetch/actions/check.py index 32d6ddbed..5dc43726d 100644 --- a/ersilia/hub/fetch/actions/check.py +++ b/ersilia/hub/fetch/actions/check.py @@ -1,5 +1,5 @@ -from . import BaseAction from ....serve.autoservice import AutoService +from . import BaseAction class ModelChecker(BaseAction): @@ -20,5 +20,8 @@ def __init__(self, model_id, config_json): ) def check(self): + """ + Check that the autoservice works. + """ self.logger.debug("Checking that autoservice works") AutoService(self.model_id) diff --git a/ersilia/hub/fetch/actions/content.py b/ersilia/hub/fetch/actions/content.py index e3f58e24e..a23d6ddf3 100644 --- a/ersilia/hub/fetch/actions/content.py +++ b/ersilia/hub/fetch/actions/content.py @@ -1,9 +1,10 @@ -import os import json -from . import BaseAction +import os + from ....db.hubdata.localslugs import SlugDb -from ...content.card import ModelCard from ....default import CARD_FILE +from ...content.card import ModelCard +from . import BaseAction class CardGetter(BaseAction): @@ -26,6 +27,14 @@ def __init__(self, model_id, config_json): self.slugdb = SlugDb(config_json=config_json) def get(self): + """ + Get the model card. + + Returns + ------- + dict + The model card of the model. + """ self.logger.debug("Getting model card of {0}".format(self.model_id)) card = self.mc.get(self.model_id, as_json=False) slug = card["Slug"] diff --git a/ersilia/hub/fetch/actions/get.py b/ersilia/hub/fetch/actions/get.py index 8c1e2e33d..51fb50ac3 100644 --- a/ersilia/hub/fetch/actions/get.py +++ b/ersilia/hub/fetch/actions/get.py @@ -1,25 +1,20 @@ -import json import os import shutil -import tempfile import zipfile -import yaml - -from . import BaseAction from .... import ErsiliaBase +from ....default import PREDEFINED_EXAMPLE_FILES, S3_BUCKET_URL_ZIP from ....utils.download import GitHubDownloader, S3Downloader -from ...bundle.repo import PackFile, DockerfileFile -from ....utils.exceptions_utils.throw_ersilia_exception import throw_ersilia_exception from ....utils.exceptions_utils.fetch_exceptions import ( FolderNotFoundError, S3DownloaderError, ) -from .template_resolver import TemplateResolver - -from ....default import S3_BUCKET_URL_ZIP, PREDEFINED_EXAMPLE_FILES -from ....utils.paths import get_metadata_from_base_dir +from ....utils.exceptions_utils.throw_ersilia_exception import throw_ersilia_exception from ....utils.logging import make_temp_dir +from ....utils.paths import get_metadata_from_base_dir +from ...bundle.repo import DockerfileFile, PackFile +from . import BaseAction +from .template_resolver import TemplateResolver MODEL_DIR = "model" ROOT = os.path.basename(os.path.abspath(__file__)) diff --git a/ersilia/hub/fetch/actions/inform.py b/ersilia/hub/fetch/actions/inform.py index ca186fbcf..223a09c5b 100644 --- a/ersilia/hub/fetch/actions/inform.py +++ b/ersilia/hub/fetch/actions/inform.py @@ -1,12 +1,11 @@ -import os import json - -from ...content.information import Information -from ...bundle.repo import ServiceFile -from . import BaseAction +import os from ....default import INFORMATION_FILE from ....utils.paths import resolve_pack_method +from ...bundle.repo import ServiceFile +from ...content.information import Information +from . import BaseAction class ModelInformer(BaseAction): diff --git a/ersilia/hub/fetch/actions/modify.py b/ersilia/hub/fetch/actions/modify.py index c1ceaff62..42ad5413a 100644 --- a/ersilia/hub/fetch/actions/modify.py +++ b/ersilia/hub/fetch/actions/modify.py @@ -1,19 +1,19 @@ import os -import tempfile + import yaml from ersilia.default import PACKMODE_FILE -from . import BaseAction -from .. import ENVIRONMENT_YML, DOCKERFILE + from ....utils.conda import SimpleConda +from ....utils.logging import make_temp_dir from ....utils.terminal import run_command from ...bundle.bundle import ( - BundleEnvironmentFile, BundleDockerfileFile, + BundleEnvironmentFile, BundleRequirementsFile, ) -from ...bundle.repo import DockerfileFile -from ....utils.logging import make_temp_dir +from .. import DOCKERFILE, ENVIRONMENT_YML +from . import BaseAction class ModelModifier(BaseAction): diff --git a/ersilia/hub/fetch/actions/pack_bentoml.py b/ersilia/hub/fetch/actions/pack_bentoml.py index 88e1fc177..13301cf88 100644 --- a/ersilia/hub/fetch/actions/pack_bentoml.py +++ b/ersilia/hub/fetch/actions/pack_bentoml.py @@ -1,12 +1,12 @@ import os import sys +from ....default import PACKMODE_FILE +from ...bundle.repo import DockerfileFile, ServiceFile +from ..pack.bentoml_pack.mode import AVAILABLE_MODES, PackModeDecision +from ..pack.bentoml_pack.runners import get_runner from . import BaseAction from .modify import ModelModifier -from ..pack.bentoml_pack.mode import PackModeDecision, AVAILABLE_MODES -from ..pack.bentoml_pack.runners import get_runner -from ...bundle.repo import ServiceFile, DockerfileFile -from ....default import PACKMODE_FILE class ModelPacker(BaseAction): diff --git a/ersilia/hub/fetch/actions/pack_fastapi.py b/ersilia/hub/fetch/actions/pack_fastapi.py index b94959818..0ae9224ec 100644 --- a/ersilia/hub/fetch/actions/pack_fastapi.py +++ b/ersilia/hub/fetch/actions/pack_fastapi.py @@ -1,10 +1,9 @@ import os -import sys -from . import BaseAction -from ..pack.fastapi_pack.mode import PackModeDecision, AVAILABLE_MODES -from ..pack.fastapi_pack.runners import get_runner from ....default import PACKMODE_FILE +from ..pack.fastapi_pack.mode import AVAILABLE_MODES, PackModeDecision +from ..pack.fastapi_pack.runners import get_runner +from . import BaseAction class ModelPacker(BaseAction): diff --git a/ersilia/hub/fetch/actions/prepare.py b/ersilia/hub/fetch/actions/prepare.py index 1dc1a30d0..783a20f47 100644 --- a/ersilia/hub/fetch/actions/prepare.py +++ b/ersilia/hub/fetch/actions/prepare.py @@ -1,9 +1,8 @@ -from . import BaseAction +from .... import throw_ersilia_exception +from ....utils.exceptions_utils.delete_exceptions import ModelDeleteError from ...bundle.status import ModelStatus from ...delete.delete import ModelFullDeleter - -from ....utils.exceptions_utils.delete_exceptions import ModelDeleteError -from .... import throw_ersilia_exception +from . import BaseAction class ModelPreparer(BaseAction): diff --git a/ersilia/hub/fetch/actions/setup.py b/ersilia/hub/fetch/actions/setup.py index 92b3293a9..34fae95de 100644 --- a/ersilia/hub/fetch/actions/setup.py +++ b/ersilia/hub/fetch/actions/setup.py @@ -1,7 +1,7 @@ +from ....setup.requirements.conda import CondaRequirement from ....setup.requirements.eospath import EosHomePathRequirement -from ....setup.requirements.git import GitLfsRequirement, GithubCliRequirement +from ....setup.requirements.git import GithubCliRequirement, GitLfsRequirement from ....setup.requirements.ping import PingRequirement -from ....setup.requirements.conda import CondaRequirement from . import BaseAction diff --git a/ersilia/hub/fetch/actions/sniff_bentoml.py b/ersilia/hub/fetch/actions/sniff_bentoml.py index 1c47a5642..1d2e8f94c 100644 --- a/ersilia/hub/fetch/actions/sniff_bentoml.py +++ b/ersilia/hub/fetch/actions/sniff_bentoml.py @@ -1,27 +1,24 @@ -import os +import collections import csv import json -import collections +import os from pathlib import Path -from .... import throw_ersilia_exception - -from . import BaseAction -from .... import ErsiliaBase -from .... import ErsiliaModel -from ....io.input import ExampleGenerator -from ....io.pure import PureDataTyper -from ....io.annotated import AnnotatedDataTyper +from .... import ErsiliaBase, ErsiliaModel, throw_ersilia_exception from ....default import ( API_SCHEMA_FILE, MODEL_SIZE_FILE, PREDEFINED_EXAMPLE_FILES, ) +from ....io.annotated import AnnotatedDataTyper +from ....io.input import ExampleGenerator +from ....io.pure import PureDataTyper from ....utils.exceptions_utils.exceptions import EmptyOutputError from ....utils.exceptions_utils.fetch_exceptions import ( OutputDataTypesNotConsistentError, ) from ....utils.paths import get_metadata_from_base_dir +from . import BaseAction class BuiltinExampleReader(ErsiliaBase): diff --git a/ersilia/hub/fetch/actions/sniff_fastapi.py b/ersilia/hub/fetch/actions/sniff_fastapi.py index bd48d154e..bdce709bc 100644 --- a/ersilia/hub/fetch/actions/sniff_fastapi.py +++ b/ersilia/hub/fetch/actions/sniff_fastapi.py @@ -1,14 +1,11 @@ -import os import csv import json +import os from pathlib import Path -from .... import throw_ersilia_exception - -from . import BaseAction -from .... import ErsiliaBase +from .... import ErsiliaBase, throw_ersilia_exception from ....default import MODEL_SIZE_FILE - +from . import BaseAction N = 3 diff --git a/ersilia/hub/fetch/actions/template_resolver.py b/ersilia/hub/fetch/actions/template_resolver.py index 45fe5a56e..3c5553a8e 100644 --- a/ersilia/hub/fetch/actions/template_resolver.py +++ b/ersilia/hub/fetch/actions/template_resolver.py @@ -1,11 +1,10 @@ -import os import http.client +import os import urllib.parse +from ....default import ALLOWED_API_NAMES, GITHUB_ORG from . import BaseAction -from ....default import GITHUB_ORG, ALLOWED_API_NAMES - class TemplateResolver(BaseAction): """ @@ -51,7 +50,7 @@ def _check_file_in_github(self, file_path: str) -> bool: conn.request("HEAD", parsed_url.path) response = conn.getresponse() return response.status == 200 - except Exception as e: + except Exception: return False finally: conn.close() diff --git a/ersilia/hub/fetch/actions/toolize.py b/ersilia/hub/fetch/actions/toolize.py index dc2a2a121..bc49831db 100644 --- a/ersilia/hub/fetch/actions/toolize.py +++ b/ersilia/hub/fetch/actions/toolize.py @@ -1,9 +1,10 @@ import sys -from . import BaseAction -from ...bundle.status import ModelStatus -from ....utils.terminal import run_command + from ....db.environments.localdb import EnvironmentDb from ....setup.requirements.docker import DockerRequirement +from ....utils.terminal import run_command +from ...bundle.status import ModelStatus +from . import BaseAction class ModelToolizer(BaseAction): diff --git a/ersilia/hub/fetch/fetch.py b/ersilia/hub/fetch/fetch.py index c145ba212..3e750b546 100644 --- a/ersilia/hub/fetch/fetch.py +++ b/ersilia/hub/fetch/fetch.py @@ -1,26 +1,25 @@ -import os -import json import importlib +import json +import os from collections import namedtuple -from .lazy_fetchers.dockerhub import ModelDockerHubFetcher -from .lazy_fetchers.hosted import ModelHostedFetcher -from ...db.hubdata.interfaces import JsonModelsInterface from ... import ErsiliaBase -from ...hub.fetch.actions.template_resolver import TemplateResolver -from ...hub.fetch.actions.setup import SetupChecker +from ...db.hubdata.interfaces import JsonModelsInterface +from ...default import MODEL_SOURCE_FILE, PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI from ...hub.delete.delete import ModelFullDeleter +from ...hub.fetch.actions.template_resolver import TemplateResolver from ...setup.requirements import check_bentoml from ...utils.exceptions_utils.fetch_exceptions import ( - NotInstallableWithFastAPI, NotInstallableWithBentoML, + NotInstallableWithFastAPI, StandardModelExampleError, ) -from .register.standard_example import ModelStandardExample from ...utils.exceptions_utils.throw_ersilia_exception import throw_ersilia_exception from ...utils.terminal import yes_no_input -from ...default import PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI, EOS, MODEL_SOURCE_FILE -from . import STATUS_FILE, DONE_TAG +from . import STATUS_FILE +from .lazy_fetchers.dockerhub import ModelDockerHubFetcher +from .lazy_fetchers.hosted import ModelHostedFetcher +from .register.standard_example import ModelStandardExample FetchResult = namedtuple("FetchResult", ["fetch_success", "reason"]) @@ -222,7 +221,6 @@ def _fetch_from_hosted(self, model_id: str): self.logger.debug("Fetching from hosted done") def _decide_if_use_dockerhub(self, model_id: str) -> bool: - if self.repo_path is not None: return False if self.force_from_dockerhub: diff --git a/ersilia/hub/fetch/fetch_bentoml.py b/ersilia/hub/fetch/fetch_bentoml.py index 822a0338b..10c261769 100644 --- a/ersilia/hub/fetch/fetch_bentoml.py +++ b/ersilia/hub/fetch/fetch_bentoml.py @@ -2,26 +2,24 @@ import json import os -from timeit import default_timer as timer from datetime import timedelta +from timeit import default_timer as timer -from .actions.template_resolver import TemplateResolver -from .actions.setup import SetupChecker -from .actions.prepare import ModelPreparer +from ... import ErsiliaBase +from . import DONE_TAG, STATUS_FILE +from .actions.check import ModelChecker +from .actions.content import CardGetter from .actions.get import ModelGetter +from .actions.inform import ModelInformer from .actions.lake import LakeGetter from .actions.pack_bentoml import ModelPacker -from .actions.toolize import ModelToolizer -from .actions.content import CardGetter -from .actions.check import ModelChecker +from .actions.prepare import ModelPreparer +from .actions.setup import SetupChecker from .actions.sniff_bentoml import ModelSniffer -from .actions.inform import ModelInformer +from .actions.template_resolver import TemplateResolver +from .actions.toolize import ModelToolizer from .register.register import ModelRegisterer -from ... import ErsiliaBase - -from . import STATUS_FILE, DONE_TAG - class ModelFetcherFromBentoML(ErsiliaBase): """ @@ -52,7 +50,9 @@ class ModelFetcherFromBentoML(ErsiliaBase): -------- .. code-block:: python - fetcher = ModelFetcherFromBentoML(config_json=config) + fetcher = ModelFetcherFromBentoML( + config_json=config + ) fetcher.fetch(model_id="eosxxxx") """ @@ -205,7 +205,9 @@ def fetch(self, model_id: str): -------- .. code-block:: python - fetcher = ModelFetcherFromBentoML(config_json=config) + fetcher = ModelFetcherFromBentoML( + config_json=config + ) fetcher.fetch(model_id="eosxxxx") """ self._fetch(model_id) diff --git a/ersilia/hub/fetch/fetch_fastapi.py b/ersilia/hub/fetch/fetch_fastapi.py index 1a078d0dd..29817b266 100644 --- a/ersilia/hub/fetch/fetch_fastapi.py +++ b/ersilia/hub/fetch/fetch_fastapi.py @@ -1,26 +1,23 @@ """Fetch model from the Ersilia Model Hub using FastAPI.""" -import os import json - -from timeit import default_timer as timer +import os from datetime import timedelta +from timeit import default_timer as timer -from .actions.template_resolver import TemplateResolver -from .actions.setup import SetupChecker -from .actions.prepare import ModelPreparer +from ... import ErsiliaBase +from . import DONE_TAG, STATUS_FILE +from .actions.check import ModelChecker +from .actions.content import CardGetter from .actions.get import ModelGetter +from .actions.inform import ModelInformer from .actions.pack_fastapi import ModelPacker -from .actions.content import CardGetter -from .actions.check import ModelChecker +from .actions.prepare import ModelPreparer +from .actions.setup import SetupChecker from .actions.sniff_fastapi import ModelSniffer -from .actions.inform import ModelInformer +from .actions.template_resolver import TemplateResolver from .register.register import ModelRegisterer -from ... import ErsiliaBase - -from . import STATUS_FILE, DONE_TAG - class ModelFetcherFromFastAPI(ErsiliaBase): """ @@ -47,7 +44,9 @@ class ModelFetcherFromFastAPI(ErsiliaBase): -------- .. code-block:: python - fetcher = ModelFetcherFromFastAPI(config_json=config) + fetcher = ModelFetcherFromFastAPI( + config_json=config + ) fetcher.fetch(model_id="eosxxxx") """ @@ -177,7 +176,9 @@ def fetch(self, model_id: str): -------- .. code-block:: python - fetcher = ModelFetcherFromFastAPI(config_json=config) + fetcher = ModelFetcherFromFastAPI( + config_json=config + ) fetcher.fetch(model_id="eosxxxx") """ self.logger.debug("Fetching from FastAPI...") diff --git a/ersilia/hub/fetch/inner_template/pack.py b/ersilia/hub/fetch/inner_template/pack.py index 48cca2022..6d63e51f5 100644 --- a/ersilia/hub/fetch/inner_template/pack.py +++ b/ersilia/hub/fetch/inner_template/pack.py @@ -1,7 +1,9 @@ +# ruff: noqa + import os -from src.service import load_model -from src.service import Service -from src.service import CHECKPOINTS_BASEDIR, FRAMEWORK_BASEDIR + +from src.service import CHECKPOINTS_BASEDIR, FRAMEWORK_BASEDIR, Service, load_model + def main(): diff --git a/ersilia/hub/fetch/inner_template/src/service.py b/ersilia/hub/fetch/inner_template/src/service.py index 79484541c..9cdd2c586 100644 --- a/ersilia/hub/fetch/inner_template/src/service.py +++ b/ersilia/hub/fetch/inner_template/src/service.py @@ -1,16 +1,14 @@ -from typing import List, Dict, Any +import csv +import os +import pickle +import shutil +import subprocess +from typing import Any, Dict, List from bentoml import BentoService, api, artifacts from bentoml.adapters import JsonInput -from bentoml.types import JsonSerializable from bentoml.service import BentoServiceArtifact - -import pickle -import os -import shutil -import tempfile -import subprocess -import csv +from bentoml.types import JsonSerializable from .....utils.logging import make_temp_dir diff --git a/ersilia/hub/fetch/lazy_fetchers/dockerhub.py b/ersilia/hub/fetch/lazy_fetchers/dockerhub.py index 8ccdbcde9..73839facf 100644 --- a/ersilia/hub/fetch/lazy_fetchers/dockerhub.py +++ b/ersilia/hub/fetch/lazy_fetchers/dockerhub.py @@ -1,27 +1,25 @@ -import os -import json import asyncio -from ..register.register import ModelRegisterer +import json +import os -from .... import ErsiliaBase, throw_ersilia_exception -from .... import EOS +from .... import EOS, ErsiliaBase, throw_ersilia_exception from ....default import ( - DOCKERHUB_ORG, + API_SCHEMA_FILE, DOCKERHUB_LATEST_TAG, - PREDEFINED_EXAMPLE_FILES, + DOCKERHUB_ORG, INFORMATION_FILE, - API_SCHEMA_FILE, + PREDEFINED_EXAMPLE_FILES, ) - -from ...pull.pull import ModelPuller from ....serve.services import PulledDockerImageService from ....setup.requirements.docker import DockerRequirement from ....utils.docker import ( + PACK_METHOD_BENTOML, SimpleDocker, resolve_pack_method_docker, - PACK_METHOD_BENTOML, ) +from ...pull.pull import ModelPuller from .. import STATUS_FILE +from ..register.register import ModelRegisterer class ModelDockerHubFetcher(ErsiliaBase): diff --git a/ersilia/hub/fetch/lazy_fetchers/hosted.py b/ersilia/hub/fetch/lazy_fetchers/hosted.py index bda822770..7b2dc1647 100644 --- a/ersilia/hub/fetch/lazy_fetchers/hosted.py +++ b/ersilia/hub/fetch/lazy_fetchers/hosted.py @@ -1,14 +1,14 @@ +import json import os + import requests -import json -from ..register.register import ModelRegisterer -from ....serve.services import HostedService -from .... import ErsiliaBase -from .... import EOS +from .... import EOS, ErsiliaBase from ....default import API_SCHEMA_FILE, INFORMATION_FILE, IS_FETCHED_FROM_HOSTED_FILE -from .. import STATUS_FILE +from ....serve.services import HostedService from ...fetch import ModelURLResolver +from .. import STATUS_FILE +from ..register.register import ModelRegisterer class ModelHostedFetcher(ErsiliaBase): diff --git a/ersilia/hub/fetch/pack/bentoml_pack/__init__.py b/ersilia/hub/fetch/pack/bentoml_pack/__init__.py index 0da838c26..e9b3558ef 100644 --- a/ersilia/hub/fetch/pack/bentoml_pack/__init__.py +++ b/ersilia/hub/fetch/pack/bentoml_pack/__init__.py @@ -2,11 +2,10 @@ import shutil from ..... import ErsiliaBase +from .....default import BENTOML_PATH, H5_DATA_FILE, H5_EXTENSION, ISAURA_FILE_TAG from ....bundle.repo import DockerfileFile from ....delete.delete import ModelBentoDeleter - from ... import MODEL_INSTALL_COMMANDS_FILE -from .....default import BENTOML_PATH, H5_DATA_FILE, ISAURA_FILE_TAG, H5_EXTENSION class _Deleter(ErsiliaBase): @@ -60,7 +59,7 @@ def _dest_bundle_symlink(self): # model_install_commands model_install_commands_path = os.path.join(path, MODEL_INSTALL_COMMANDS_FILE) if not os.path.exists(model_install_commands_path): - with open(model_install_commands_path, "w") as f: + with open(model_install_commands_path, "w"): pass trg = os.path.join(bundle_dir, MODEL_INSTALL_COMMANDS_FILE) self.logger.debug("Creating model_install_commands.sh symlink dest <> bundle") @@ -143,6 +142,17 @@ def _write_model_install_commands(self): class BasePack(_Deleter, _Symlinker, _Writer): + """ + Base class for handling BentoML model packs. + + Parameters + ---------- + model_id : str + Identifier of the model. + config_json : dict + Configuration settings for the pack. + """ + def __init__(self, model_id, config_json): _Deleter.__init__(self, model_id, config_json) _Symlinker.__init__(self, model_id, config_json) diff --git a/ersilia/hub/fetch/pack/bentoml_pack/mode.py b/ersilia/hub/fetch/pack/bentoml_pack/mode.py index 531c13f6a..01cb14524 100644 --- a/ersilia/hub/fetch/pack/bentoml_pack/mode.py +++ b/ersilia/hub/fetch/pack/bentoml_pack/mode.py @@ -1,13 +1,13 @@ -import os import json +import os from ..... import ErsiliaBase -from ....bundle.repo import DockerfileFile -from .....utils.versioning import Versioner -from .....utils.system import SystemChecker +from .....default import MODEL_CONFIG_FILENAME from .....setup.requirements.conda import CondaRequirement from .....setup.requirements.docker import DockerRequirement -from .....default import MODEL_CONFIG_FILENAME +from .....utils.system import SystemChecker +from .....utils.versioning import Versioner +from ....bundle.repo import DockerfileFile AVAILABLE_MODES = ["system", "venv", "conda", "docker"] diff --git a/ersilia/hub/fetch/pack/bentoml_pack/runners.py b/ersilia/hub/fetch/pack/bentoml_pack/runners.py index 68b43ae56..c3ab6f3f0 100644 --- a/ersilia/hub/fetch/pack/bentoml_pack/runners.py +++ b/ersilia/hub/fetch/pack/bentoml_pack/runners.py @@ -1,25 +1,23 @@ import os -import tempfile try: import bentoml except: bentoml = None -from . import BasePack -from .....utils.terminal import run_command +from ..... import throw_ersilia_exception from .....db.environments.localdb import EnvironmentDb from .....db.environments.managers import DockerManager -from .....utils.venv import SimpleVenv +from .....default import DEFAULT_VENV +from .....setup.baseconda import SetupBaseConda from .....utils.conda import SimpleConda from .....utils.docker import SimpleDocker -from .....setup.baseconda import SetupBaseConda - -from .....default import DEFAULT_VENV -from ... import MODEL_INSTALL_COMMANDS_FILE -from ..... import throw_ersilia_exception from .....utils.exceptions_utils.fetch_exceptions import CondaEnvironmentExistsError from .....utils.logging import make_temp_dir +from .....utils.terminal import run_command +from .....utils.venv import SimpleVenv +from ... import MODEL_INSTALL_COMMANDS_FILE +from . import BasePack USE_CHECKSUM = False @@ -37,7 +35,9 @@ class SystemPack(BasePack): -------- .. code-block:: python - packer = SystemPack(model_id="eosxxxx", config_json=config) + packer = SystemPack( + model_id="eosxxxx", config_json=config + ) packer.run() """ diff --git a/ersilia/hub/fetch/pack/fastapi_pack/__init__.py b/ersilia/hub/fetch/pack/fastapi_pack/__init__.py index cef67b430..7ea4d22bf 100644 --- a/ersilia/hub/fetch/pack/fastapi_pack/__init__.py +++ b/ersilia/hub/fetch/pack/fastapi_pack/__init__.py @@ -1,8 +1,8 @@ import os import shutil -from .....default import H5_DATA_FILE, ISAURA_FILE_TAG, H5_EXTENSION from ..... import ErsiliaBase +from .....default import H5_DATA_FILE, H5_EXTENSION, ISAURA_FILE_TAG class _Symlinker(ErsiliaBase): @@ -44,5 +44,16 @@ def _symlinks(self): class BasePack(_Symlinker): + """ + Base class for handling FastAPI model packs. + + Parameters + ---------- + model_id : str + Identifier of the model. + config_json : dict, optional + Configuration settings for the pack. + """ + def __init__(self, model_id, config_json=None): _Symlinker.__init__(self, model_id, config_json) diff --git a/ersilia/hub/fetch/pack/fastapi_pack/mode.py b/ersilia/hub/fetch/pack/fastapi_pack/mode.py index c7d325227..57ddd1061 100644 --- a/ersilia/hub/fetch/pack/fastapi_pack/mode.py +++ b/ersilia/hub/fetch/pack/fastapi_pack/mode.py @@ -1,10 +1,9 @@ -import os import json +import os +from ..... import ErsiliaBase from .....default import MODEL_CONFIG_FILENAME from .....utils.system import SystemChecker -from ..... import ErsiliaBase - AVAILABLE_MODES = ["system", "conda"] @@ -24,7 +23,9 @@ class PackModeDecision(ErsiliaBase): -------- .. code-block:: python - pmd = PackModeDecision(model_id="model123", config_json={}) + pmd = PackModeDecision( + model_id="model123", config_json={} + ) mode = pmd.decide() """ diff --git a/ersilia/hub/fetch/pack/fastapi_pack/runners.py b/ersilia/hub/fetch/pack/fastapi_pack/runners.py index 2c83a1609..0a453f8dd 100644 --- a/ersilia/hub/fetch/pack/fastapi_pack/runners.py +++ b/ersilia/hub/fetch/pack/fastapi_pack/runners.py @@ -1,16 +1,14 @@ import os + import yaml -from . import BasePack -from .....utils.terminal import run_command +from ..... import EOS, throw_ersilia_exception from .....db.environments.localdb import EnvironmentDb from .....utils.conda import SimpleConda from .....utils.docker import SimpleDockerfileParser - -from ..... import throw_ersilia_exception from .....utils.exceptions_utils.fetch_exceptions import CondaEnvironmentExistsError - -from ..... import EOS +from .....utils.terminal import run_command +from . import BasePack class SystemPack(BasePack): @@ -28,7 +26,9 @@ class SystemPack(BasePack): -------- .. code-block:: python - packer = SystemPack(model_id="eosxxxx", config_json=config) + packer = SystemPack( + model_id="eosxxxx", config_json=config + ) packer.run() """ @@ -71,7 +71,9 @@ class CondaPack(BasePack): -------- .. code-block:: python - packer = CondaPack(model_id="eosxxxx", config_json=config) + packer = CondaPack( + model_id="eosxxxx", config_json=config + ) packer.run() """ @@ -173,7 +175,9 @@ def get_runner(pack_mode: str): .. code-block:: python runner_class = get_runner(pack_mode="system") - runner = runner_class(model_id="eosxxxx", config_json=config) + runner = runner_class( + model_id="eosxxxx", config_json=config + ) """ if pack_mode == "system": return SystemPack diff --git a/ersilia/hub/fetch/register/register.py b/ersilia/hub/fetch/register/register.py index d1887fddb..d6b0b4cf5 100644 --- a/ersilia/hub/fetch/register/register.py +++ b/ersilia/hub/fetch/register/register.py @@ -1,12 +1,11 @@ +import datetime +import json import os import shutil -import json -import datetime + import validators -from .... import ErsiliaBase -from .... import EOS -from .... import throw_ersilia_exception +from .... import EOS, ErsiliaBase, throw_ersilia_exception from ....default import ( IS_FETCHED_FROM_DOCKERHUB_FILE, IS_FETCHED_FROM_HOSTED_FILE, @@ -31,7 +30,9 @@ class ModelRegisterer(ErsiliaBase): -------- .. code-block:: python - registerer = ModelRegisterer(model_id="eosxxxx", config_json=config) + registerer = ModelRegisterer( + model_id="eosxxxx", config_json=config + ) await registerer.register(is_from_dockerhub=True) """ @@ -196,7 +197,9 @@ async def register( -------- .. code-block:: python - registerer = ModelRegisterer(model_id="eosxxxx", config_json=config) + registerer = ModelRegisterer( + model_id="eosxxxx", config_json=config + ) await registerer.register(is_from_dockerhub=True) """ if is_from_dockerhub and is_from_hosted: diff --git a/ersilia/hub/fetch/register/standard_example.py b/ersilia/hub/fetch/register/standard_example.py index 382da2122..c5a849834 100644 --- a/ersilia/hub/fetch/register/standard_example.py +++ b/ersilia/hub/fetch/register/standard_example.py @@ -1,16 +1,13 @@ import os -from ....utils.terminal import run_command_check_output, run_command -from ....utils.conda import SimpleConda -from ....utils.exceptions_utils.fetch_exceptions import StandardModelExampleError - +from .... import ErsiliaBase, throw_ersilia_exception from ....default import ( EXAMPLE_STANDARD_INPUT_CSV_FILENAME, EXAMPLE_STANDARD_OUTPUT_CSV_FILENAME, ) - -from .... import ErsiliaBase -from .... import throw_ersilia_exception +from ....utils.conda import SimpleConda +from ....utils.exceptions_utils.fetch_exceptions import StandardModelExampleError +from ....utils.terminal import run_command, run_command_check_output class ModelStandardExample(ErsiliaBase): @@ -28,7 +25,9 @@ class ModelStandardExample(ErsiliaBase): -------- .. code-block:: python - example_runner = ModelStandardExample(model_id="model123", config_json=config) + example_runner = ModelStandardExample( + model_id="model123", config_json=config + ) example_runner.run() """ @@ -76,6 +75,8 @@ def run(self): env_name = os.environ.get("CONDA_DEFAULT_ENV") self.logger.debug("The environment name is {0}".format(env_name)) SimpleConda().run_commandlines(env_name, commands) + + self.logger.info(f"Run log: {open(run_log).read()}") self._check_file_exists(output_csv=output_csv) self.logger.debug("Removing log file: {0}".format(run_log)) os.remove(run_log) diff --git a/ersilia/hub/pull/pull.py b/ersilia/hub/pull/pull.py index d275dff8e..38c326ea2 100644 --- a/ersilia/hub/pull/pull.py +++ b/ersilia/hub/pull/pull.py @@ -1,22 +1,19 @@ -import requests -import subprocess -import tempfile -import json +import asyncio import os import re -import asyncio -import aiofiles -from ... import ErsiliaBase -from ...utils.terminal import yes_no_input, run_command -from ... import throw_ersilia_exception +import subprocess + +import requests + +from ... import ErsiliaBase, throw_ersilia_exception +from ...default import DOCKERHUB_LATEST_TAG, DOCKERHUB_ORG +from ...utils.docker import SimpleDocker from ...utils.exceptions_utils.pull_exceptions import ( - DockerImageNotAvailableError, DockerConventionalPullError, + DockerImageNotAvailableError, ) - -from ...utils.docker import SimpleDocker -from ...default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG, EOS, MODEL_SIZE_FILE from ...utils.logging import make_temp_dir +from ...utils.terminal import run_command, yes_no_input PULL_IMAGE = os.environ.get("PULL_IMAGE", "Y") @@ -38,7 +35,9 @@ class ModelPuller(ErsiliaBase): -------- .. code-block:: python - puller = ModelPuller(model_id="eosxxxx", config_json=config) + puller = ModelPuller( + model_id="eosxxxx", config_json=config + ) await puller.async_pull() """ diff --git a/ersilia/io/dataframe.py b/ersilia/io/dataframe.py index 667b50c62..e3bae3dd2 100644 --- a/ersilia/io/dataframe.py +++ b/ersilia/io/dataframe.py @@ -1,6 +1,7 @@ -import numpy as np import csv +import numpy as np + class Dataframe(object): """ diff --git a/ersilia/io/input.py b/ersilia/io/input.py index b0d73bbec..cac95cba0 100644 --- a/ersilia/io/input.py +++ b/ersilia/io/input.py @@ -1,22 +1,17 @@ -import os -import shutil -import json import csv import importlib import itertools +import json +import os +import shutil +from .. import ErsiliaBase, throw_ersilia_exception +from ..default import PREDEFINED_EXAMPLE_FILES from ..hub.content.card import ModelCard -from .. import ErsiliaBase -from .. import throw_ersilia_exception - from ..utils.exceptions_utils.exceptions import NullModelIdentifierError - -from .shape import InputShape -from .shape import InputShapeSingle, InputShapeList, InputShapePairOfLists +from .readers.file import JsonFileReader, TabularFileReader from .readers.pyinput import PyInputReader -from .readers.file import TabularFileReader, JsonFileReader - -from ..default import PREDEFINED_EXAMPLE_FILES +from .shape import InputShape, InputShapeList, InputShapePairOfLists, InputShapeSingle class BaseIOGetter(ErsiliaBase): @@ -284,6 +279,21 @@ def adapt_one_by_one(self, inp): yield d def batch_iter(self, data, batch_size): + """ + Yield batches of data. + + Parameters + ---------- + data : iterable + The data to be batched. + batch_size : int + The size of each batch. + + Yields + ------ + iterable + A batch of data. + """ it = iter(data) while True: chunk = tuple(itertools.islice(it, batch_size)) diff --git a/ersilia/io/output.py b/ersilia/io/output.py index 6780ade78..d31406d7b 100644 --- a/ersilia/io/output.py +++ b/ersilia/io/output.py @@ -1,20 +1,21 @@ +import collections import csv -import os import json +import os import random + import numpy as np -import tempfile -import collections -from .dataframe import Dataframe -from .readers.file import FileTyper -from .pure import PureDataTyper -from ..serve.schema import ApiSchema + from .. import ErsiliaBase -from ..utils.hdf5 import Hdf5Data, Hdf5DataStacker from ..db.hubdata.interfaces import JsonModelsInterface from ..default import FEATURE_MERGE_PATTERN, PACK_METHOD_FASTAPI -from ..utils.paths import resolve_pack_method +from ..serve.schema import ApiSchema +from ..utils.hdf5 import Hdf5Data, Hdf5DataStacker from ..utils.logging import make_temp_dir +from ..utils.paths import resolve_pack_method +from .dataframe import Dataframe +from .pure import PureDataTyper +from .readers.file import FileTyper class DataFrame(object): @@ -206,8 +207,8 @@ def _has_meta(self, result: dict) -> bool: if self._expect_meta is not None: return self._expect_meta try: - r = result["result"] - m = result["meta"] + r = result["result"] # noqa: F841 + m = result["meta"] # noqa: F841 self._expect_meta = True except: self._expect_meta = False diff --git a/ersilia/io/output_logger.py b/ersilia/io/output_logger.py index 9e343782d..da7c436e0 100644 --- a/ersilia/io/output_logger.py +++ b/ersilia/io/output_logger.py @@ -1,5 +1,5 @@ -import os import csv +import os # TODO: For now, only explicitly tabular results are returned. We could, in principle, output any other result diff --git a/ersilia/io/pure.py b/ersilia/io/pure.py index fece02d4f..cbd4f7c0e 100644 --- a/ersilia/io/pure.py +++ b/ersilia/io/pure.py @@ -1,6 +1,5 @@ -import json -import os import numpy as np + from .. import ErsiliaBase from ..utils.paths import get_metadata_from_base_dir diff --git a/ersilia/io/readers/file.py b/ersilia/io/readers/file.py index 79d8333b0..46923efb6 100644 --- a/ersilia/io/readers/file.py +++ b/ersilia/io/readers/file.py @@ -1,14 +1,13 @@ -import os -import tempfile +import collections import csv import json -import collections +import os + import numpy as np -from ..shape import InputShape -from ..shape import InputShapeSingle, InputShapeList, InputShapePairOfLists from ... import logger from ...utils.logging import make_temp_dir +from ..shape import InputShape, InputShapeList, InputShapePairOfLists, InputShapeSingle MIN_COLUMN_VALIDITY = 0.8 FLATTENED_EVIDENCE = 0.2 @@ -271,12 +270,12 @@ def _get_delimiter_by_extension(self): def get_delimiter(self): """ - Get the column delimiter of the file. + Get the delimiter used in the file. Returns ------- str - The column delimiter. + The delimiter used in the file. """ delimiters = collections.defaultdict(int) default_extension = self._get_delimiter_by_extension() @@ -612,7 +611,12 @@ class TabularFileShapeStandardizer(BaseTabularFile): -------- .. code-block:: python - tfss = TabularFileShapeStandardizer("data.csv", "standard_data.csv", "single", IOHandler()) + tfss = TabularFileShapeStandardizer( + "data.csv", + "standard_data.csv", + "single", + IOHandler(), + ) tfss.standardize() """ @@ -748,6 +752,14 @@ def __init__(self, path): self._has_header = True def get_delimiter(self): + """ + Get the delimiter used in the file. + + Returns + ------- + str + The delimiter used in the file. + """ if self.path.endswith(".csv"): return "," if self.path.endswith(".tsv"): diff --git a/ersilia/io/readers/pyinput.py b/ersilia/io/readers/pyinput.py index c8586cfb4..9b33661a9 100644 --- a/ersilia/io/readers/pyinput.py +++ b/ersilia/io/readers/pyinput.py @@ -1,4 +1,4 @@ -from ..shape import InputShapeSingle, InputShapeList, InputShapePairOfLists +from ..shape import InputShapeList, InputShapePairOfLists, InputShapeSingle class PyInputReader(object): diff --git a/ersilia/io/types/compound.py b/ersilia/io/types/compound.py index e528ffd08..343115575 100644 --- a/ersilia/io/types/compound.py +++ b/ersilia/io/types/compound.py @@ -1,18 +1,17 @@ -import os import csv +import os import random -from ...utils.identifiers.arbitrary import ArbitraryIdentifier +from ... import logger from ...setup.requirements.compound import ( ChemblWebResourceClientRequirement, RdkitRequirement, ) -from ... import logger -from ..shape import InputShapeSingle, InputShapeList, InputShapePairOfLists -from .examples import compound as test_examples -from . import EXAMPLES_FOLDER +from ...utils.identifiers.arbitrary import ArbitraryIdentifier from ...utils.identifiers.compound import CompoundIdentifier - +from ..shape import InputShapeList, InputShapePairOfLists, InputShapeSingle +from . import EXAMPLES_FOLDER +from .examples import compound as test_examples EXAMPLES = "compound.tsv" diff --git a/ersilia/io/types/naive.py b/ersilia/io/types/naive.py index 66f73515b..dfc6a5c33 100644 --- a/ersilia/io/types/naive.py +++ b/ersilia/io/types/naive.py @@ -1,3 +1,6 @@ +# ruff: noqa: D101, D102 + + class IO(object): def __init__(self): pass diff --git a/ersilia/io/types/protein.py b/ersilia/io/types/protein.py index 55a06e3f6..eed09b583 100644 --- a/ersilia/io/types/protein.py +++ b/ersilia/io/types/protein.py @@ -1,10 +1,12 @@ -import random import os + from ...utils.identifiers.protein import ProteinIdentifier from . import EXAMPLES_FOLDER EXAMPLES = "protein.tsv" +# ruff: noqa: D101, D102 + class IO(object): def __init__(self): diff --git a/ersilia/io/types/text.py b/ersilia/io/types/text.py index 314018242..d767ac33f 100644 --- a/ersilia/io/types/text.py +++ b/ersilia/io/types/text.py @@ -1,14 +1,13 @@ -import os import csv -import random import importlib +import os +import random -from ...utils.identifiers.arbitrary import ArbitraryIdentifier from ... import logger -from ..shape import InputShapeSingle, InputShapeList, InputShapePairOfLists -from .examples import text as test_examples +from ...utils.identifiers.arbitrary import ArbitraryIdentifier +from ..shape import InputShapeList, InputShapePairOfLists, InputShapeSingle from . import EXAMPLES_FOLDER - +from .examples import text as test_examples EXAMPLES = "text.tsv" diff --git a/ersilia/lake/base.py b/ersilia/lake/base.py index 577638f66..2d9c4d3c8 100644 --- a/ersilia/lake/base.py +++ b/ersilia/lake/base.py @@ -1,3 +1,4 @@ +import importlib import os try: @@ -45,7 +46,7 @@ def is_installed(self) -> bool: If 'isaura' is not installed, a warning is logged. """ try: - import isaura + importlib.util.find_spec("isaura") return True except ModuleNotFoundError: diff --git a/ersilia/lake/interface.py b/ersilia/lake/interface.py index bd23b6ec6..c908c4a06 100644 --- a/ersilia/lake/interface.py +++ b/ersilia/lake/interface.py @@ -1,4 +1,5 @@ import json + import numpy as np try: @@ -6,9 +7,9 @@ except: Hdf5ApiExplorer = None -from .base import LakeBase from ..io.dataframe import Dataframe from ..io.output import DictlistDataframeConverter +from .base import LakeBase class IsauraInterface(LakeBase): diff --git a/ersilia/lake/manager.py b/ersilia/lake/manager.py index 01a40fe92..6ec88d2d8 100644 --- a/ersilia/lake/manager.py +++ b/ersilia/lake/manager.py @@ -5,9 +5,8 @@ except: Hdf5Explorer = None -from ..utils.terminal import run_command - from .. import ErsiliaBase +from ..utils.terminal import run_command class IsauraManager(ErsiliaBase): diff --git a/ersilia/lake/s3_logger.py b/ersilia/lake/s3_logger.py index f6f5ce615..b7e3aa1f1 100644 --- a/ersilia/lake/s3_logger.py +++ b/ersilia/lake/s3_logger.py @@ -1,7 +1,6 @@ import os -import boto3 -from .. import ErsiliaBase, EOS +from .. import EOS, ErsiliaBase from ..default import ERSILIA_RUNS_FOLDER diff --git a/ersilia/publish/deploy.py b/ersilia/publish/deploy.py index f97243b53..4454bbf79 100644 --- a/ersilia/publish/deploy.py +++ b/ersilia/publish/deploy.py @@ -1,10 +1,12 @@ -from ..core.base import ErsiliaBase -from ..app.app import AppBase, StreamlitApp -import subprocess import os import shutil +import subprocess + import streamlit +from ..app.app import AppBase, StreamlitApp +from ..core.base import ErsiliaBase + class DeployBase(ErsiliaBase): """ @@ -184,7 +186,10 @@ class Heroku(DeployBase): -------- .. code-block:: python - deployer = Heroku(config_json="path/to/config.json", credentials_json="path/to/credentials.json") + deployer = Heroku( + config_json="path/to/config.json", + credentials_json="path/to/credentials.json", + ) deployer.deploy("model_id") """ @@ -371,7 +376,11 @@ class Deployer(object): -------- .. code-block:: python - deployer = Deployer(cloud="heroku", config_json="path/to/config.json", credentials_json="path/to/credentials.json") + deployer = Deployer( + cloud="heroku", + config_json="path/to/config.json", + credentials_json="path/to/credentials.json", + ) deployer.deploy("model_id") """ diff --git a/ersilia/publish/dockerhub.py b/ersilia/publish/dockerhub.py index d3e1a1594..a8c97ca67 100644 --- a/ersilia/publish/dockerhub.py +++ b/ersilia/publish/dockerhub.py @@ -1,7 +1,7 @@ from .. import ErsiliaBase from ..db.environments.managers import DockerManager +from ..default import DOCKERHUB_LATEST_TAG, DOCKERHUB_ORG from ..utils.terminal import run_command -from ..default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG class DockerHubUploader(ErsiliaBase): @@ -19,8 +19,13 @@ class DockerHubUploader(ErsiliaBase): -------- .. code-block:: python - uploader = DockerHubUploader(model_id="model_id", config_json="path/to/config.json") - uploader.set_credentials(docker_user="username", docker_pwd="password") + uploader = DockerHubUploader( + model_id="model_id", + config_json="path/to/config.json", + ) + uploader.set_credentials( + docker_user="username", docker_pwd="password" + ) uploader.upload() """ diff --git a/ersilia/publish/inspect.py b/ersilia/publish/inspect.py index 1b8e7e002..9bb1f4024 100644 --- a/ersilia/publish/inspect.py +++ b/ersilia/publish/inspect.py @@ -1,22 +1,24 @@ -import requests -import subprocess import os +import subprocess import time -import yaml from collections import namedtuple -from ..hub.content.card import RepoMetadataFile -from ..hub.fetch.actions.template_resolver import TemplateResolver -from ..utils.logging import logger + +import requests +import yaml + from ..default import ( - INSTALL_YAML_FILE, DOCKERFILE_FILE, - PACK_METHOD_FASTAPI, - PACK_METHOD_BENTOML, + INSTALL_YAML_FILE, METADATA_JSON_FILE, METADATA_YAML_FILE, - RUN_FILE, + PACK_METHOD_BENTOML, + PACK_METHOD_FASTAPI, PREDEFINED_EXAMPLE_FILES, + RUN_FILE, ) +from ..hub.content.card import RepoMetadataFile +from ..hub.fetch.actions.template_resolver import TemplateResolver +from ..utils.logging import logger Result = namedtuple("Result", ["success", "details"]) @@ -44,7 +46,9 @@ class ModelInspector: -------- .. code-block:: python - inspector = ModelInspector(model="model_id", dir="path/to/repo") + inspector = ModelInspector( + model="model_id", dir="path/to/repo" + ) result = inspector.check_repo_exists() result = inspector.check_complete_metadata() """ @@ -364,6 +368,13 @@ def _validate_repo_structure(self, required_items): return missing_items def validate_repo_structure(self): + """ + Validate the repository structure. + + Returns + ------- + List of missing items. + """ logger.debug(f"Pack Type: {self.pack_type}") if self.pack_type == PACK_METHOD_BENTOML: required_items = self.BENTOML_FILES diff --git a/ersilia/publish/lake.py b/ersilia/publish/lake.py index 51c5dd062..1ae6fc634 100644 --- a/ersilia/publish/lake.py +++ b/ersilia/publish/lake.py @@ -3,6 +3,19 @@ class LakeStorer(ErsiliaBase): + """ + Class to handle storing data in the lake. + + Parameters + ---------- + model_id : str + The ID of the model. + config_json : dict + Configuration in JSON format. + credentials_json : dict + Credentials in JSON format. + """ + def __init__(self, model_id, config_json, credentials_json): ErsiliaBase.__init__( self, config_json=config_json, credentials_json=credentials_json @@ -15,6 +28,9 @@ def __init__(self, model_id, config_json, credentials_json): ) def store(self): + """ + Store data in the lake. + """ self.logger.debug("Appeding local to public") self.isaura_manager.append_local_to_public() self.logger.debug("Pushing") diff --git a/ersilia/publish/publish.py b/ersilia/publish/publish.py index 9da654c42..5cb37199b 100644 --- a/ersilia/publish/publish.py +++ b/ersilia/publish/publish.py @@ -1,13 +1,12 @@ import os import shutil -from .rebase import TemplateRebaser -from . import EOS_TEMPLATE_REPOSITORY - -from ..utils.terminal import run_command from .. import ErsiliaBase -from ..utils.dvc import DVCSetup from ..default import GITHUB_ORG +from ..utils.dvc import DVCSetup +from ..utils.terminal import run_command +from . import EOS_TEMPLATE_REPOSITORY +from .rebase import TemplateRebaser class ModelPublisher(ErsiliaBase): @@ -117,7 +116,13 @@ def push(self): self.git_push() def test(self): + """ + Test the publishing process. + """ pass def docker(self): + """ + Handle Docker-related tasks. + """ pass diff --git a/ersilia/publish/rebase.py b/ersilia/publish/rebase.py index bab86cae3..c8cff1a92 100644 --- a/ersilia/publish/rebase.py +++ b/ersilia/publish/rebase.py @@ -1,4 +1,6 @@ -import os, shutil +import os +import shutil + from .. import ErsiliaBase from ..default import GITHUB_ORG from ..utils.terminal import run_command diff --git a/ersilia/publish/s3.py b/ersilia/publish/s3.py index 708a4db6f..24c7ad46f 100644 --- a/ersilia/publish/s3.py +++ b/ersilia/publish/s3.py @@ -1,13 +1,13 @@ -import boto3 import os import shutil -import tempfile import zipfile -from ..utils.terminal import run_command -from ..utils.logging import make_temp_dir +import boto3 + from .. import ErsiliaBase from ..default import ERSILIA_MODELS_S3_BUCKET, ERSILIA_MODELS_ZIP_S3_BUCKET +from ..utils.logging import make_temp_dir +from ..utils.terminal import run_command AWS_ACCOUNT_REGION = "eu-central-1" @@ -27,8 +27,14 @@ class S3BucketRepoUploader(ErsiliaBase): -------- .. code-block:: python - uploader = S3BucketRepoUploader(model_id="model_id", config_json="path/to/config.json") - uploader.set_credentials(aws_access_key_id="access_key", aws_secret_access_key="secret_key") + uploader = S3BucketRepoUploader( + model_id="model_id", + config_json="path/to/config.json", + ) + uploader.set_credentials( + aws_access_key_id="access_key", + aws_secret_access_key="secret_key", + ) uploader.upload() """ diff --git a/ersilia/publish/store.py b/ersilia/publish/store.py index a8b373b09..88f30dc19 100644 --- a/ersilia/publish/store.py +++ b/ersilia/publish/store.py @@ -6,10 +6,11 @@ import os import shutil + from .. import ErsiliaBase -from ..utils.zip import Zipper -from ..utils.upload import OsfUploader from ..utils.remove import OsfRemover +from ..utils.upload import OsfUploader +from ..utils.zip import Zipper class ModelStorager(ErsiliaBase): @@ -93,7 +94,10 @@ class ModelRemover(ErsiliaBase): -------- .. code-block:: python - remover = ModelRemover(config_json="path/to/config.json", credentials_json="path/to/credentials.json") + remover = ModelRemover( + config_json="path/to/config.json", + credentials_json="path/to/credentials.json", + ) remover.remove(model_id="model_id") """ diff --git a/ersilia/publish/test.py b/ersilia/publish/test.py index 59c5acbe1..255c4b30b 100644 --- a/ersilia/publish/test.py +++ b/ersilia/publish/test.py @@ -1,50 +1,54 @@ -# TODO adapt to input-type agnostic. For now, it works only with Compound input types. +import csv import json import os -import csv import subprocess +import sys import tempfile import time -import click import types -import sys -from enum import Enum from dataclasses import dataclass -from typing import List from datetime import datetime -from pathlib import Path -from .inspect import ModelInspector -from ..utils.conda import SimpleConda -from .. import ErsiliaBase, throw_ersilia_exception -from ..io.input import ExampleGenerator -from ..utils.exceptions_utils import test_exceptions as texc -from ..utils.terminal import run_command_check_output -from ..hub.fetch.actions.template_resolver import TemplateResolver -from ..default import ( - INFORMATION_FILE, - INSTALL_YAML_FILE, - DOCKERFILE_FILE, - PACK_METHOD_FASTAPI, - PACK_METHOD_BENTOML, - METADATA_JSON_FILE, - METADATA_YAML_FILE, - RUN_FILE, - PREDEFINED_EXAMPLE_FILES, -) +from enum import Enum +from typing import List +# ruff: noqa MISSING_PACKAGES = False - try: - from scipy.stats import spearmanr from fuzzywuzzy import fuzz from rich.console import Console from rich.table import Table from rich.text import Text + from scipy.stats import spearmanr except ImportError: MISSING_PACKAGES = True +# ruff: enable +import click + +from .. import ErsiliaBase, throw_ersilia_exception +from ..default import ( + DOCKERFILE_FILE, + INFORMATION_FILE, + INSTALL_YAML_FILE, + METADATA_JSON_FILE, + METADATA_YAML_FILE, + PACK_METHOD_BENTOML, + PACK_METHOD_FASTAPI, + PREDEFINED_EXAMPLE_FILES, + RUN_FILE, +) +from ..hub.fetch.actions.template_resolver import TemplateResolver +from ..io.input import ExampleGenerator +from ..utils.conda import SimpleConda +from ..utils.exceptions_utils import test_exceptions as texc +from ..utils.terminal import run_command_check_output +from .inspect import ModelInspector class Options(Enum): + """ + Enum for different options. + """ + NUM_SAMPLES = 5 BASE = "base" OUTPUT_CSV = "result.csv" @@ -54,6 +58,10 @@ class Options(Enum): class TableType(Enum): + """ + Enum for different table types. + """ + MODEL_INFORMATION_CHECKS = "Model Information Checks" MODEL_FILE_CHECKS = "Model File Checks" MODEL_DIRECTORY_SIZES = "Model Directory Sizes" @@ -64,6 +72,10 @@ class TableType(Enum): @dataclass class TableConfig: + """ + Configuration for a table. + """ + title: str headers: List[str] @@ -92,6 +104,10 @@ class TableConfig: class STATUS_CONFIGS(Enum): + """ + Enum for status configurations. + """ + PASSED = ("PASSED", "green", "✔") FAILED = ("FAILED", "red", "✘") WARNING = ("WARNING", "yellow", "⚠") @@ -109,28 +125,31 @@ def __str__(self): # fmt: off class TestResult(Enum): + """ + Enum for test results. + """ DATE_TIME_RUN = ( - "Date and Time Run", + "Date and Time Run", lambda: datetime.now().strftime("%Y-%m-%d %H:%M:%S") ) TIME_ELAPSED = ( - "Time to Run Tests (seconds)", + "Time to Run Tests (seconds)", lambda elapsed: elapsed ) BASIC_CHECKS = ( - "Basic Checks Passed", + "Basic Checks Passed", lambda svc: svc.information_check ) SINGLE_INPUT = ( - "Single Input Run Without Error", + "Single Input Run Without Error", lambda svc: svc.single_input ) EXAMPLE_INPUT = ( - "Example Input Run Without Error", + "Example Input Run Without Error", lambda svc: svc.example_input ) CONSISTENT_OUTPUT = ( - "Outputs Consistent", + "Outputs Consistent", lambda svc: svc.consistent_output ) BASH_RUN = ( @@ -144,6 +163,23 @@ def __init__(self, key, value_function): @classmethod def generate_results(cls, checkup_service, elapsed_time, run_using_bash): + """ + Generate test results. + + Parameters + ---------- + checkup_service : object + The checkup service. + elapsed_time : float + The elapsed time. + run_using_bash : bool + Whether to run using bash. + + Returns + ------- + dict + The generated results. + """ results = {} for test in cls: func_args = {} @@ -157,18 +193,38 @@ def generate_results(cls, checkup_service, elapsed_time, run_using_bash): value = test.value_function(**func_args) results[test.key] = value return results - + class CheckStrategy: + """ + Execuetd a strategy for checking inspect commands. + + Parameters + ---------- + check_function : callable + The function to check. + success_key : str + The key for success. + details_key : str + The key for details. + """ def __init__(self, check_function, success_key, details_key): self.check_function = check_function self.success_key = success_key self.details_key = details_key def execute(self): + """ + Execute the check strategy. + + Returns + ------- + dict + The results of the check. + """ if self.check_function is None: return {} result = self.check_function() - if result is None: + if result is None: return {} return { self.success_key: result.success, @@ -222,15 +278,15 @@ def run(self) -> dict: """ if not self.model: raise ValueError("Model must be specified.") - + inspector = ModelInspector(self.model, self.dir) checks = self._get_checks(inspector) - + output = {} for strategy in checks: if strategy.check_function: output.update(strategy.execute()) - + return output def _get_checks(self, inspector: ModelInspector) -> list: @@ -338,7 +394,7 @@ def run_command(command: str, logger, capture_output: bool = False, shell: bool text=True, shell=shell ) - + stdout_lines, stderr_lines = [], [] for line in iter(process.stdout.readline, ''): @@ -401,7 +457,7 @@ def check_conda_env(self): raise Exception( f"Conda virtual environment not found for {self.model_id}" ) - + @staticmethod def get_conda_env_location(model_id: str, logger) -> str: """ @@ -432,7 +488,7 @@ def get_conda_env_location(model_id: str, logger) -> str: ) for line in result.splitlines(): if line.startswith("#") or not line.strip(): - continue + continue parts = line.split() if parts[0] == model_id: return parts[-1] @@ -440,7 +496,7 @@ def get_conda_env_location(model_id: str, logger) -> str: print(f"Error running conda command: {e.stderr}") except Exception as e: print(f"Unexpected error: {e}") - + return None class IOService: @@ -468,8 +524,8 @@ class IOService: -------- .. code-block:: python - ios = IOService(logger=logger, dest_dir="/path/to/dest", model_path="/path/to/model", - bundle_path="/path/to/bundle", bentoml_path="/path/to/bentoml", + ios = IOService(logger=logger, dest_dir="/path/to/dest", model_path="/path/to/model", + bundle_path="/path/to/bundle", bentoml_path="/path/to/bentoml", model_id="model_id", dir="/path/to/dir") ios.read_information() """ @@ -515,7 +571,7 @@ def _run_check(self, check_function, data, check_name: str, additional_info=None else: check_function(data) self.check_results.append(( - check_name, + check_name, str(STATUS_CONFIGS.PASSED) )) return True @@ -524,22 +580,22 @@ def _run_check(self, check_function, data, check_name: str, additional_info=None f"Check '{check_name}' failed: {e}" ) self.check_results.append(( - check_name, + check_name, str(STATUS_CONFIGS.FAILED) )) return False def _generate_table(self, title: str, headers: List[str], rows: List[List[str]], large_table: bool = False, merge: bool = False): - f_col_width = 30 if large_table else 30 - l_col_width = 50 if large_table else 10 - d_col_width = 30 if not large_table else 20 + f_col_width = 30 if large_table else 30 + l_col_width = 50 if large_table else 10 + d_col_width = 30 if not large_table else 20 table = Table( title=Text( title, - style="bold light_green" + style="bold light_green" ), - border_style="light_green", + border_style="light_green", show_lines=True, ) @@ -547,27 +603,27 @@ def _generate_table(self, title: str, headers: List[str], rows: List[List[str]], headers[0], justify="left", width=f_col_width, - style="bold" + style="bold" ) for header in headers[1:-1]: table.add_column( header, justify="center", width=d_col_width, - style="bold" + style="bold" ) table.add_column( headers[-1], justify="right", width=l_col_width, - style="bold" + style="bold" ) - prev_value = None + prev_value = None for row in rows: first_col = str(row[0]) if merge and first_col == prev_value: - first_col = "" + first_col = "" else: prev_value = first_col @@ -598,7 +654,7 @@ def get_model_type(model_id: str, repo_path: str) -> str: The type of the model (e.g., PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI). """ resolver = TemplateResolver( - model_id=model_id, + model_id=model_id, repo_path=repo_path ) if resolver.is_bentoml(): @@ -623,7 +679,7 @@ def get_file_requirements(self) -> List[str]: If the model type is unsupported. """ type = IOService.get_model_type( - model_id=self.model_id, + model_id=self.model_id, repo_path=self.dir ) if type == PACK_METHOD_BENTOML: @@ -650,8 +706,8 @@ def read_information(self) -> dict: If the information file does not exist. """ file = os.path.join( - self._dest_dir, - self.model_id, + self._dest_dir, + self.model_id, INFORMATION_FILE ) if not os.path.exists(file): @@ -701,7 +757,7 @@ def get_conda_env_size(self) -> int: """ try: loc = SetupService.get_conda_env_location( - self.model_id, + self.model_id, self.logger ) return self.calculate_directory_size(loc) @@ -712,9 +768,22 @@ def get_conda_env_size(self) -> int: return 0 def calculate_directory_size(self, path: str) -> int: + """ + Calculate the size of a directory. + + Parameters + ---------- + path : str + The path to the directory. + + Returns + ------- + int + The size of the directory. + """ try: size_output = SetupService.run_command( - ["du", "-sm", path], + ["du", "-sm", path], logger=self.logger, capture_output=True, shell=False @@ -726,7 +795,7 @@ def calculate_directory_size(self, path: str) -> int: f"Error calculating directory size for {path}: {e}" ) return 0 - + @throw_ersilia_exception() def get_directories_sizes(self) -> tuple: """ @@ -762,7 +831,7 @@ class CheckService: -------- .. code-block:: python - check_service = CheckService(logger=logger, model_id="model_id", dest_dir="/path/to/dest", + check_service = CheckService(logger=logger, model_id="model_id", dest_dir="/path/to/dest", dir="/path/to/dir", ios=ios) check_service.check_files() """ @@ -793,18 +862,18 @@ class CheckService: } INPUT_SHAPE = { - "Single", - "Pair", - "List", - "Pair of Lists", + "Single", + "Pair", + "List", + "Pair of Lists", "List of Lists" } OUTPUT_SHAPE = { - "Single", - "List", - "Flexible List", - "Matrix", + "Single", + "List", + "Flexible List", + "Matrix", "Serializable Object" } @@ -838,9 +907,9 @@ def check_files(self): for file in requirements: self.logger.debug(f"Checking file: {file}") self._run_check( - self._check_file_existence, + self._check_file_existence, None, - f"File: {file}", + f"File: {file}", file ) @@ -866,27 +935,27 @@ def _check_model_task(self, data): raw_tasks = data.get("card", {}).get("Task", "") if isinstance(raw_tasks, str): tasks = [ - task.strip() - for task - in raw_tasks.split(",") + task.strip() + for task + in raw_tasks.split(",") if task.strip() ] elif isinstance(raw_tasks, list): tasks = [ - task.strip() - for task - in raw_tasks + task.strip() + for task + in raw_tasks if isinstance(task, str) and task.strip() ] else: raise texc.InvalidEntry( - "Task", + "Task", message="Task field must be a string or list." ) if not tasks: raise texc.InvalidEntry( - "Task", + "Task", message="Task field is missing or empty." ) @@ -903,39 +972,39 @@ def _check_model_output(self, data): raw_outputs = data.get("card", {}).get("Output", "") or data.get("metadata", {}).get("Output", "") if isinstance(raw_outputs, str): outputs = [ - output.strip() - for output + output.strip() + for output in raw_outputs.split(",") if output.strip() ] elif isinstance(raw_outputs, list): outputs = [ - output.strip() - for output - in raw_outputs + output.strip() + for output + in raw_outputs if isinstance(output, str) and output.strip() ] else: raise texc.InvalidEntry( - "Output", + "Output", message="Output field must be a string or list." ) if not outputs: raise texc.InvalidEntry( - "Output", + "Output", message="Output field is missing or empty." ) invalid_outputs = [ - output - for output - in outputs + output + for output + in outputs if output not in self.MODEL_OUTPUT ] if invalid_outputs: raise texc.InvalidEntry( - "Output", + "Output", message=f"Invalid outputs: {' '.join(invalid_outputs)}" ) @@ -944,44 +1013,44 @@ def _check_model_output(self, data): def _check_model_input(self, data): self.logger.debug("Checking model input") valid_inputs = [{"Compound"}, {"Protein"}, {"Text"}] - + model_input = data.get("card", {}).get("Input") or data.get("metadata", {}).get("Input") - + if not model_input or set(model_input) not in valid_inputs: raise texc.InvalidEntry("Input") def _check_model_input_shape(self, data): self.logger.debug("Checking model input shape") model_input_shape = ( - data.get("card", {}).get("Input Shape") or + data.get("card", {}).get("Input Shape") or data.get("metadata", {}).get("InputShape") ) - + if model_input_shape not in self.INPUT_SHAPE: raise texc.InvalidEntry("Input Shape") def _check_model_output_type(self, data): self.logger.debug("Checking model output type...") valid_output_types = [{"String"}, {"Float"}, {"Integer"}] - + model_output_type = ( - data.get("card", {}).get("Output Type") or + data.get("card", {}).get("Output Type") or data.get("metadata", {}).get("OutputType") ) - + if not model_output_type or set(model_output_type) not in valid_output_types: raise texc.InvalidEntry("Output Type") def _check_model_output_shape(self, data): self.logger.debug("Checking model output shape...") model_output_shape = ( - data.get("card", {}).get("Output Shape") or + data.get("card", {}).get("Output Shape") or data.get("metadata", {}).get("OutputShape") ) - + if model_output_shape not in self.OUTPUT_SHAPE: raise texc.InvalidEntry("Output Shape") - + @throw_ersilia_exception() def check_information(self, output): """ @@ -994,8 +1063,8 @@ def check_information(self, output): """ self.logger.debug(f"Beginning checks for {self.model_id} model information") file = os.path.join( - self._dest_dir, - self.model_id, + self._dest_dir, + self.model_id, INFORMATION_FILE ) with open(file, "r") as f: @@ -1017,7 +1086,7 @@ def check_information(self, output): @throw_ersilia_exception() def check_single_input(self, output, run_model, run_example): """ - Check if the model can run with a single input to check if it has a value + Check if the model can run with a single input to check if it has a value in the produced output csv. Parameters @@ -1030,14 +1099,14 @@ def check_single_input(self, output, run_model, run_example): Function to generate example input. """ input = run_example( - n_samples=Options.NUM_SAMPLES.value, - file_name=None, - simple=True, + n_samples=Options.NUM_SAMPLES.value, + file_name=None, + simple=True, try_predefined=False ) result = run_model( - input=input, - output=output, + input=input, + output=output, batch=100 ) @@ -1072,17 +1141,17 @@ def check_example_input(self, output, run_model, run_example): Function to generate example input. """ input_samples = run_example( - n_samples=Options.NUM_SAMPLES.value, - file_name=None, - simple=True, + n_samples=Options.NUM_SAMPLES.value, + file_name=None, + simple=True, try_predefined=False ) self.logger.debug("Testing model on input of 5 smiles given by 'example' command") result = run_model( - input=input_samples, - output=output, + input=input_samples, + output=output, batch=100 ) @@ -1140,7 +1209,7 @@ def validate_output(output1, output2): elif isinstance(output1, str): if _compare_output_strings(output1, output2) <= 95: raise texc.InconsistentOutputs(self.model_id) - + def read_csv(file_path): absolute_path = os.path.abspath(file_path) if not os.path.exists(absolute_path): @@ -1151,23 +1220,23 @@ def read_csv(file_path): output1_path = os.path.abspath(Options.OUTPUT1_CSV.value) output2_path = os.path.abspath(Options.OUTPUT2_CSV.value) - + self.logger.debug("Confirming model produces consistent output...") input_samples = run_example( - n_samples=Options.NUM_SAMPLES.value, - file_name=None, - simple=True, + n_samples=Options.NUM_SAMPLES.value, + file_name=None, + simple=True, try_predefined=False ) run_model( - input=input_samples, - output=output1_path, + input=input_samples, + output=output1_path, batch=100 ) run_model( - input=input_samples, - output=output2_path, + input=input_samples, + output=output2_path, batch=100 ) @@ -1216,13 +1285,13 @@ class RunnerService: """ def __init__( - self, - model_id: str, - logger, - ios_service: IOService, - checkup_service: CheckService, + self, + model_id: str, + logger, + ios_service: IOService, + checkup_service: CheckService, setup_service: SetupService, - model_path: str, + model_path: str, level: str, dir: str, remote: bool, @@ -1280,19 +1349,19 @@ def fetch(self): Fetch the model repository from the specified directory. """ SetupService.run_command( - " ".join(["ersilia", - "-v", - "fetch", self.model_id, + " ".join(["ersilia", + "-v", + "fetch", self.model_id, "--from_dir", self.dir ]), logger=self.logger, ) def run_exampe( - self, - n_samples: int, - file_name: str = None, - simple: bool = True, + self, + n_samples: int, + file_name: str = None, + simple: bool = True, try_predefined: bool = False ): """ @@ -1315,9 +1384,9 @@ def run_exampe( List of generated input samples. """ return self.example.example( - n_samples=n_samples, - file_name=file_name, - simple=simple, + n_samples=n_samples, + file_name=file_name, + simple=simple, try_predefined=try_predefined ) @throw_ersilia_exception() @@ -1332,7 +1401,7 @@ def run_bash(self): """ def compute_rmse(y_true, y_pred): return sum((yt - yp) ** 2 for yt, yp in zip(y_true, y_pred)) ** 0.5 / len(y_true) - + def compare_outputs(bsh_data, ers_data): columns = set(bsh_data[0].keys()) & set(data[0].keys()) self.logger.debug(f"Common columns: {columns}") @@ -1349,12 +1418,12 @@ def compare_outputs(bsh_data, ers_data): raise texc.InconsistentOutputs(self.model_id) elif all(isinstance(val, str) for val in bv + ev): if not all( - self._compare_string_similarity(a, b, 95) - for a, b + self._compare_string_similarity(a, b, 95) + for a, b in zip(bv, ev) ): raise texc.InconsistentOutputs(self.model_id) - + def read_csv(path, flag=False): try: with open(path, "r") as file: @@ -1382,7 +1451,7 @@ def infer_type(value): try: return float(value) except ValueError: - return value + return value _values = [infer_type(x) for x in values] @@ -1398,10 +1467,10 @@ def infer_type(value): def run_subprocess(command, env_vars=None): try: result = subprocess.run( - command, - capture_output=True, - text=True, - check=True, + command, + capture_output=True, + text=True, + check=True, env=env_vars, ) self.logger.debug( @@ -1423,9 +1492,9 @@ def run_subprocess(command, env_vars=None): error_log_path = os.path.join(temp_dir, "error.txt") input = self.run_exampe( - n_samples=Options.NUM_SAMPLES.value, + n_samples=Options.NUM_SAMPLES.value, file_name=None, - simple=True, + simple=True, try_predefined=False ) @@ -1435,9 +1504,9 @@ def run_subprocess(command, env_vars=None): f.write("smiles\n" + "\n".join(map(str, input))) run_sh_path = os.path.join( - model_path, - "model", - "framework", + model_path, + "model", + "framework", RUN_FILE ) if not os.path.exists(run_sh_path): @@ -1462,18 +1531,18 @@ def run_subprocess(command, env_vars=None): bsh_data = read_csv(bash_output_path) self.logger.info(f"Bash Data:{bsh_data}") - self.logger.debug(f"Serving the model after run.sh") + self.logger.debug("Serving the model after run.sh") run_subprocess( - ["ersilia", "-v", - "serve", self.model_id, + ["ersilia", "-v", + "serve", self.model_id, ] ) self.logger.debug( - f"Running model for bash data consistency checking" + "Running model for bash data consistency checking" ) - out = run_subprocess( - ["ersilia", "-v", - "run", + run_subprocess( + ["ersilia", "-v", + "run", "-i", ex_file, "-o", output_path ] @@ -1486,13 +1555,33 @@ def run_subprocess(command, env_vars=None): @staticmethod def default_env(): + """ + Get the default environment. + + Returns + ------- + str + The default environment. + """ if "CONDA_DEFAULT_ENV" in os.environ: return os.environ["CONDA_DEFAULT_ENV"] - else: - return Options.BASE.value + return None @staticmethod def conda_prefix(is_base): + """ + Get the conda prefix. + + Parameters + ---------- + is_base : bool + Whether it is the base environment. + + Returns + ------- + str + The conda prefix. + """ o = run_command_check_output("which conda").rstrip() if o: o = os.path.abspath(os.path.join(o, "..", "..")) @@ -1505,17 +1594,22 @@ def conda_prefix(is_base): return o def is_base(self): + """ + Check if the current environment is the base environment. + + Returns + ------- + bool + True if it is the base environment, False otherwise. + """ default_env = self.default_env() self.logger.debug(f"Default environment: {default_env}") - if default_env == "base": - return True - else: - return False + return default_env == "base" def _compare_string_similarity( - self, + self, str1, - str2, + str2, threshold ): similarity = fuzz.ratio(str1, str2) @@ -1533,7 +1627,7 @@ def make_output(self, elapsed_time: float): """ results = TestResult.generate_results( self.checkup_service, - elapsed_time, + elapsed_time, self.run_using_bash ) data = [(key, str(value)) for key, value in results.items()] @@ -1558,10 +1652,10 @@ def run(self, output_file: str = Options.OUTPUT_CSV.value): """ if not output_file: output_file = os.path.join( - self._model_path(self.model_id), + self._model_path(self.model_id), Options.OUTPUT_CSV.value ) - + start_time = time.time() try: @@ -1570,7 +1664,7 @@ def run(self, output_file: str = Options.OUTPUT_CSV.value): self._perform_inspect() if self.level == Options.LEVEL_DEEP.value: self._perform_deep_checks(output_file) - + elapsed_time = time.time() - start_time self.make_output(elapsed_time) self._clear_folders() @@ -1603,12 +1697,12 @@ def transform_key(self, value): elif value is False: return str(STATUS_CONFIGS.FAILED) return value - + def _perform_inspect(self): if self.inspect: out = self.inspecter.run() out = { - " ".join(word.capitalize() + " ".join(word.capitalize() for word in k.split("_")): self.transform_key(v) for k, v in out.items() } @@ -1621,7 +1715,7 @@ def _perform_inspect(self): large_table=True, merge=True ) - + def _perform_checks(self, output_file): self.checkup_service.check_information(output_file) self._generate_table( @@ -1645,8 +1739,8 @@ def _log_directory_sizes(self): def _perform_deep_checks(self, output_file): self.checkup_service.check_single_input( - output_file, - self.run_model, + output_file, + self.run_model, self.run_exampe ) self._generate_table( @@ -1658,12 +1752,12 @@ def _perform_deep_checks(self, output_file): ] ) self.checkup_service.check_example_input( - output_file, - self.run_model, + output_file, + self.run_model, self.run_exampe ) self.checkup_service.check_consistent_output( - self.run_exampe, + self.run_exampe, self.run_model ) self.run_bash() @@ -1682,18 +1776,35 @@ def _clear_folders(self): ) class ModelTester(ErsiliaBase): + """ + Class to handle model testing. Initializes the model tester services and runs the tests. + Parameters + ---------- + model_id : str + The ID of the model. + level : str + The level of testing. + dir : str + The directory for the model. + inspect : bool + Whether to inspect the model. + remote : bool + Whether to fetch the model from a remote source. + remove : bool + Whether to remove the model after testing. + """ def __init__( - self, - model_id, - level, + self, + model_id, + level, dir, inspect, remote, remove ): ErsiliaBase.__init__( - self, - config_json=None, + self, + config_json=None, credentials_json=None ) self.model_id = model_id @@ -1704,23 +1815,23 @@ def __init__( self.remove = remove self._check_pedendency() self.setup_service = SetupService( - self.model_id, - self.dir, + self.model_id, + self.dir, self.logger, self.remote ) self.ios = IOService( - self.logger, + self.logger, self._dest_dir, - self._model_path, - self._get_bundle_location, - self._get_bentoml_location, + self._model_path, + self._get_bundle_location, + self._get_bentoml_location, self.model_id, self.dir ) self.checks = CheckService( - self.logger, - self.model_id, + self.logger, + self.model_id, self._dest_dir, self.dir, self.ios, @@ -1732,9 +1843,9 @@ def __init__( ) self.runner = RunnerService( self.model_id, - self.logger, - self.ios, - self.checks, + self.logger, + self.ios, + self.checks, self.setup_service, self._model_path, self.level, @@ -1752,6 +1863,9 @@ def _check_pedendency(self): ) def setup(self): + """ + Set up the model tester. + """ self.logger.debug(f"Running conda setup for {self.model_id}") self.setup_service.fetch_repo() # for remote option self.logger.debug(f"Fetching model {self.model_id} from local dir: {self.dir}") @@ -1759,4 +1873,12 @@ def setup(self): self.setup_service.check_conda_env() def run(self, output_file=None): + """ + Run the model tester. + + Parameters + ---------- + output_file : str, optional + The output file. + """ self.runner.run(output_file) diff --git a/ersilia/serve/api.py b/ersilia/serve/api.py index 88e9338ca..da263192f 100644 --- a/ersilia/serve/api.py +++ b/ersilia/serve/api.py @@ -1,20 +1,18 @@ -import os +import collections import csv -import requests import json -import collections -import tempfile +import os import time +import requests + +from .. import ErsiliaBase, logger from ..io.input import GenericInputAdapter from ..io.output import GenericOutputAdapter from ..lake.interface import IsauraInterface -from .. import logger -from .. import ErsiliaBase -from .schema import ApiSchema - from ..utils.exceptions_utils.api_exceptions import InputFileNotFoundError from ..utils.logging import make_temp_dir +from .schema import ApiSchema class Api(object): @@ -38,8 +36,18 @@ class Api(object): -------- .. code-block:: python - api = Api(model_id='eosxxxx', url='http://0.0.0.0:25512/', api_name='run', save_to_lake=True, config_json={}) - result = api.post(input='input.json', output='output.csv', batch_size=10) + api = Api( + model_id="eosxxxx", + url="http://0.0.0.0:25512/", + api_name="run", + save_to_lake=True, + config_json={}, + ) + result = api.post( + input="input.json", + output="output.csv", + batch_size=10, + ) """ def __init__(self, model_id, url, api_name, save_to_lake, config_json): @@ -198,6 +206,23 @@ def meta(self): return self.output_adapter.meta() def post_only_calculations(self, input, output, batch_size): + """ + Post input data to the API and get the result, performing only calculations. + + Parameters + ---------- + input : str + The input data file or data. + output : str + The output data file. + batch_size : int + The batch size for processing. + + Yields + ------ + dict + The result of the API call. + """ self._batch_size = batch_size if output is not None: tmp_folder = make_temp_dir(prefix="ersilia-") @@ -222,6 +247,23 @@ def post_only_calculations(self, input, output, batch_size): yield r def post_only_reads(self, input, output, batch_size): + """ + Post input data to the API and get the result, performing only reads. + + Parameters + ---------- + input : str + The input data file or data. + output : str + The output data file. + batch_size : int + The batch size for processing. + + Yields + ------ + dict + The result of the API call. + """ self._batch_size = batch_size if output is not None: tmp_folder = make_temp_dir(prefix="ersilia-") @@ -246,6 +288,23 @@ def post_only_reads(self, input, output, batch_size): yield r def post_amenable_to_h5(self, input, output, batch_size): + """ + Post input data to the API and get the result, handling HDF5 serialization. + + Parameters + ---------- + input : str + The input data file or data. + output : str + The output data file. + batch_size : int + The batch size for processing. + + Yields + ------ + dict + The result of the API call. + """ self.logger.debug( "Checking for already available calculations in the data lake" ) @@ -305,6 +364,23 @@ def post_amenable_to_h5(self, input, output, batch_size): yield result def post_unique_input(self, input, output, batch_size): + """ + Post unique input data to the API and get the result. + + Parameters + ---------- + input : str + The input data file or data. + output : str + The output data file. + batch_size : int + The batch size for processing. + + Yields + ------ + dict + The result of the API call. + """ schema = ApiSchema(model_id=self.model_id, config_json=self.config_json) if ( not schema.isfile() diff --git a/ersilia/serve/autoservice.py b/ersilia/serve/autoservice.py index b4ea8aa04..2470faeb0 100644 --- a/ersilia/serve/autoservice.py +++ b/ersilia/serve/autoservice.py @@ -1,28 +1,27 @@ +import json import os -import tempfile import shutil -import json +import tempfile -from .services import ( - SystemBundleService, - VenvEnvironmentService, - CondaEnvironmentService, - DockerImageService, - DummyService, - PulledDockerImageService, - HostedService, -) -from .api import Api -from ..db.environments.managers import DockerManager from .. import ErsiliaBase -from ..utils import tmp_pid_file - +from ..db.environments.managers import DockerManager from ..default import ( - DEFAULT_BATCH_SIZE, - SERVICE_CLASS_FILE, APIS_LIST_FILE, + DEFAULT_BATCH_SIZE, IS_FETCHED_FROM_DOCKERHUB_FILE, IS_FETCHED_FROM_HOSTED_FILE, + SERVICE_CLASS_FILE, +) +from ..utils import tmp_pid_file +from .api import Api +from .services import ( + CondaEnvironmentService, + DockerImageService, + DummyService, + HostedService, + PulledDockerImageService, + SystemBundleService, + VenvEnvironmentService, ) DEFAULT_OUTPUT = None @@ -56,7 +55,9 @@ class AutoService(ErsiliaBase): -------- .. code-block:: python - service = AutoService(model_id='model123', config_json={}) + service = AutoService( + model_id="model123", config_json={} + ) service.serve() """ @@ -355,6 +356,9 @@ def _kill_pids(self, pids): self.logger.info("PID {0} is unassigned".format(pid)) def clean_before_serving(self): + """ + Clean processes before serving. + """ self.logger.debug("Cleaning processes before serving") tmp_file = tmp_pid_file(self.model_id) dir_name = os.path.dirname(tmp_file) @@ -370,6 +374,9 @@ def clean_before_serving(self): self._kill_pids(pids) def clean_temp_dir(self): + """ + Clean the temporary directory. + """ self.logger.debug("Cleaning temp dir") tmp_folder = tempfile.gettempdir() for d in os.listdir(tmp_folder): @@ -384,6 +391,9 @@ def clean_temp_dir(self): ) def clean_docker_containers(self): + """ + Clean Docker containers if necessary. + """ self.logger.debug("Silencing docker containers if necessary") dm = DockerManager(config_json=self.config_json) if dm.is_inside_docker(): @@ -394,6 +404,9 @@ def clean_docker_containers(self): dm.stop_containers(self.model_id) def serve(self): + """ + Serve the application. + """ self.clean_before_serving() self.clean_temp_dir() self.close() @@ -403,6 +416,9 @@ def serve(self): f.write("{0} {1}{2}".format(self.service.pid, self.service.url, os.linesep)) def close(self): + """ + Close the service. + """ tmp_file = tmp_pid_file(self.model_id) if os.path.isfile(tmp_file): pids = self._pids_from_file(tmp_file) diff --git a/ersilia/serve/environment/environment_variables.py b/ersilia/serve/environment/environment_variables.py index 078e1ec97..70468f130 100644 --- a/ersilia/serve/environment/environment_variables.py +++ b/ersilia/serve/environment/environment_variables.py @@ -4,9 +4,10 @@ import shutil import subprocess +from ... import ErsiliaBase + # from dotenv import load_dotenv from ...default import DOTENV_FILE -from ... import ErsiliaBase class GetEnvironmentVariable(ErsiliaBase): diff --git a/ersilia/serve/schema.py b/ersilia/serve/schema.py index c95670b5d..042a38cd2 100644 --- a/ersilia/serve/schema.py +++ b/ersilia/serve/schema.py @@ -1,9 +1,10 @@ -import os import json +import os + import numpy as np -from ..default import API_SCHEMA_FILE from .. import ErsiliaBase +from ..default import API_SCHEMA_FILE class ApiSchema(ErsiliaBase): diff --git a/ersilia/serve/services.py b/ersilia/serve/services.py index acb582e4a..7efa562d7 100644 --- a/ersilia/serve/services.py +++ b/ersilia/serve/services.py @@ -1,34 +1,41 @@ -import tempfile -import os +import importlib import json +import os import time -import importlib -import requests import uuid + import docker +import requests + from .. import ErsiliaBase, throw_ersilia_exception -from ..utils.terminal import run_command -from ..utils.ports import find_free_port -from ..utils.paths import resolve_pack_method from ..db.environments.localdb import EnvironmentDb from ..db.environments.managers import DockerManager +from ..default import ( + APIS_LIST_FILE, + CONTAINER_LOGS_TMP_DIR, + DEFAULT_VENV, + DOCKERHUB_LATEST_TAG, + DOCKERHUB_ORG, + INFORMATION_FILE, + IS_FETCHED_FROM_HOSTED_FILE, + PACK_METHOD_BENTOML, + PACK_METHOD_FASTAPI, + PACKMODE_FILE, +) +from ..setup.requirements.conda import CondaRequirement from ..setup.requirements.docker import DockerRequirement from ..utils.conda import SimpleConda, StandaloneConda from ..utils.docker import SimpleDocker -from ..utils.venv import SimpleVenv -from ..default import DEFAULT_VENV -from ..default import PACKMODE_FILE, APIS_LIST_FILE -from ..default import DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG, CONTAINER_LOGS_TMP_DIR -from ..default import IS_FETCHED_FROM_HOSTED_FILE -from ..default import INFORMATION_FILE -from ..default import PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI -from ..utils.session import get_session_dir from ..utils.exceptions_utils.serve_exceptions import ( BadGatewayError, DockerNotActiveError, ) from ..utils.logging import make_temp_dir -from ..setup.requirements.conda import CondaRequirement +from ..utils.paths import resolve_pack_method +from ..utils.ports import find_free_port +from ..utils.session import get_session_dir +from ..utils.terminal import run_command +from ..utils.venv import SimpleVenv SLEEP_SECONDS = 1 TIMEOUT_SECONDS = 1000 @@ -663,6 +670,14 @@ def __init__(self, model_id, config_json=None, preferred_port=None, url=None): @staticmethod def is_single_model_without_conda(): + """ + Check if there is a single model without conda. + + Returns + ------- + bool + True if conda is not installed, False otherwise. + """ conda_checker = CondaRequirement() # Returns True if conda is not installed and False otherwise return not conda_checker.is_installed() @@ -1265,9 +1280,9 @@ def is_url_available(self, url): try: response = requests.get(url, timeout=5) response.raise_for_status() - except requests.HTTPError as http_err: + except requests.HTTPError: return False - except Exception as err: + except Exception: return False else: return True @@ -1478,9 +1493,9 @@ def is_url_available(self, url): try: response = requests.get(url, timeout=5) response.raise_for_status() - except requests.HTTPError as http_err: + except requests.HTTPError: return False - except Exception as err: + except Exception: return False else: return True diff --git a/ersilia/serve/standard_api.py b/ersilia/serve/standard_api.py index 24affd609..11a66bafb 100644 --- a/ersilia/serve/standard_api.py +++ b/ersilia/serve/standard_api.py @@ -1,23 +1,23 @@ -import os +import asyncio import csv -import json import importlib -import requests -import asyncio +import json +import os + import nest_asyncio -from ..store.api import InferenceStoreApi -from ..store.utils import OutputSource +import requests + from .. import ErsiliaBase from ..default import ( + API_SCHEMA_FILE, + DEFAULT_API_NAME, EXAMPLE_STANDARD_INPUT_CSV_FILENAME, EXAMPLE_STANDARD_OUTPUT_CSV_FILENAME, -) -from ..default import ( INFORMATION_FILE, - API_SCHEMA_FILE, - DEFAULT_API_NAME, PREDEFINED_EXAMPLE_FILES, ) +from ..store.api import InferenceStoreApi +from ..store.utils import OutputSource MAX_INPUT_ROWS_STANDARD = 1000 @@ -46,7 +46,6 @@ class StandardCSVRunApi(ErsiliaBase): input_data = "path/to/input.csv" output_data = "path/to/output.csv" result = api.post(input_data, output_data) - print(result) """ def __init__(self, model_id, url, config_json=None): @@ -101,7 +100,7 @@ def _read_information_file(self): def _read_field_from_metadata(self, meta, field): if not meta: - self.logger.error(f"No metadata given") + self.logger.error("No metadata given") return None if "metadata" in meta and field in meta["metadata"]: return meta["metadata"][field] @@ -111,6 +110,14 @@ def _read_field_from_metadata(self, meta, field): self.logger.error(f"Neither 'metadata' nor 'card' contains '{field}' key.") def get_identifier_object_by_input_type(self): + """ + Get the identifier object by input type. + + Returns + ------- + object + The identifier object. + """ identifier_module_path = "ersilia.utils.identifiers.{0}".format( self.input_type[0].lower() ) @@ -211,10 +218,13 @@ def get_expected_output_header(self): with open(file, "r") as f: reader = csv.reader(f) header = next(reader) - if header[0:2] != [ - "key", - "input", - ]: # Slicing doesn't raise an error even if the list does not have 2 elements + if ( + header[0:2] + != [ + "key", + "input", + ] + ): # Slicing doesn't raise an error even if the list does not have 2 elements header = ["key", "input"] + header return header except (FileNotFoundError, StopIteration): @@ -264,6 +274,19 @@ def parse_smiles_string(self, input): return [{"key": key, "input": input, "text": input}] def serialize_to_json_three_columns(self, input_data): + """ + Serialize data to JSON with three columns. + + Parameters + ---------- + input_data : str + The input data file path. + + Returns + ------- + list + The serialized JSON data. + """ json_data = [] with open(input_data, "r") as f: reader = csv.reader(f) @@ -274,6 +297,19 @@ def serialize_to_json_three_columns(self, input_data): return json_data def serialize_to_json_two_columns(self, input_data): + """ + Serialize data to JSON with two columns. + + Parameters + ---------- + input_data : str + The input data file path. + + Returns + ------- + list + The serialized JSON data. + """ json_data = [] with open(input_data, "r") as f: reader = csv.reader(f) @@ -284,6 +320,19 @@ def serialize_to_json_two_columns(self, input_data): return json_data def serialize_to_json_one_column(self, input_data): + """ + Serialize data to JSON with one column. + + Parameters + ---------- + input_data : str + The input data file path. + + Returns + ------- + list + The serialized JSON data. + """ json_data = [] with open(input_data, "r") as f: reader = csv.reader(f) @@ -295,12 +344,38 @@ def serialize_to_json_one_column(self, input_data): return json_data async def async_serialize_to_json_one_column(self, input_data): + """ + Asynchronously serialize data to JSON with one column. + + Parameters + ---------- + input_data : str + The input data file path. + + Returns + ------- + list + The serialized JSON data. + """ smiles_list = self.get_list_from_csv(input_data) smiles_list = [smiles for smiles in smiles_list if self.validate_smiles(smiles)] json_data = await self.encoder.encode_batch(smiles_list) return json_data def get_list_from_csv(self, input_data): + """ + Get a list from a CSV file. + + Parameters + ---------- + input_data : str + The input data file path. + + Returns + ------- + list + The list of data from the CSV file. + """ smiles_list = [] with open(input_data, mode="r") as file: reader = csv.DictReader(file) diff --git a/ersilia/setup/baseconda.py b/ersilia/setup/baseconda.py index 4afdf0804..1f3f998e0 100644 --- a/ersilia/setup/baseconda.py +++ b/ersilia/setup/baseconda.py @@ -1,14 +1,13 @@ -import tempfile import os + from packaging import version +from .. import logger from ..utils.conda import SimpleConda +from ..utils.logging import make_temp_dir from ..utils.terminal import run_command from ..utils.versioning import Versioner from .utils.clone import ErsiliaCloner -from ..utils.logging import make_temp_dir - -from .. import logger class SetupBaseConda(object): diff --git a/ersilia/setup/basedocker.py b/ersilia/setup/basedocker.py index 0a615c09b..326427c2b 100644 --- a/ersilia/setup/basedocker.py +++ b/ersilia/setup/basedocker.py @@ -1,10 +1,10 @@ import os -import tempfile + +from .. import ErsiliaBase from ..utils.docker import SimpleDocker +from ..utils.logging import make_temp_dir from ..utils.versioning import Versioner -from .. import ErsiliaBase from .utils.clone import ErsiliaCloner -from ..utils.logging import make_temp_dir # TODO: Make sure it is used. diff --git a/ersilia/setup/conda.py b/ersilia/setup/conda.py index 3093ec79d..cc3c00c0b 100644 --- a/ersilia/setup/conda.py +++ b/ersilia/setup/conda.py @@ -1,6 +1,13 @@ class SetupConda(object): + """ + Class to handle conda setup. + """ + def __init__(self): pass def setup(self): + """ + Set up conda environment. + """ pass diff --git a/ersilia/setup/config.py b/ersilia/setup/config.py index 9c4758c61..4122eb2d8 100644 --- a/ersilia/setup/config.py +++ b/ersilia/setup/config.py @@ -1,14 +1,21 @@ import os -from ..default import EOS, CONFIG_JSON +from ..default import CONFIG_JSON, EOS from ..utils.config import Checker class SetupConfig(object): + """ + Class to handle configuration setup. + """ + def __init__(self): pass def setup(self): + """ + Set up configuration. + """ if self._is_done("config"): return if os.path.exists(os.path.join(EOS, CONFIG_JSON)): diff --git a/ersilia/setup/profile.py b/ersilia/setup/profile.py index 0d134ea01..3c8923b51 100644 --- a/ersilia/setup/profile.py +++ b/ersilia/setup/profile.py @@ -2,8 +2,15 @@ class SetupProfile(object): + """ + Class to handle profile setup. + """ + def __init__(self): pass def setup(self): + """ + Set up profile. + """ bashrc_cli_snippet() diff --git a/ersilia/setup/requirements/bentoml.py b/ersilia/setup/requirements/bentoml.py index d46e55b3a..8a0dde9a0 100644 --- a/ersilia/setup/requirements/bentoml.py +++ b/ersilia/setup/requirements/bentoml.py @@ -1,6 +1,6 @@ +import os import subprocess import sys -import os from ...default import EOS @@ -32,7 +32,7 @@ def is_installed(self) -> bool: True if BentoML is installed, False otherwise. """ try: - import bentoml + import bentoml # noqa: F401 return True except ImportError: diff --git a/ersilia/setup/requirements/compound.py b/ersilia/setup/requirements/compound.py index 763c7fcbf..09ccc354e 100644 --- a/ersilia/setup/requirements/compound.py +++ b/ersilia/setup/requirements/compound.py @@ -1,4 +1,5 @@ import importlib + from ...utils.terminal import run_command diff --git a/ersilia/setup/requirements/docker.py b/ersilia/setup/requirements/docker.py index b2adbac69..6eb24d3bf 100644 --- a/ersilia/setup/requirements/docker.py +++ b/ersilia/setup/requirements/docker.py @@ -1,6 +1,6 @@ -from ...utils.terminal import run_command_check_output from ...utils.docker import resolve_platform from ...utils.system import is_inside_docker +from ...utils.terminal import run_command_check_output class DockerRequirement(object): diff --git a/ersilia/setup/requirements/eospath.py b/ersilia/setup/requirements/eospath.py index 017b95e82..f6b21770f 100644 --- a/ersilia/setup/requirements/eospath.py +++ b/ersilia/setup/requirements/eospath.py @@ -1,7 +1,8 @@ +import os + from ... import throw_ersilia_exception from ...default import EOS from ...utils.exceptions_utils.setup_exceptions import EosHomePathNotFoundError -import os class EosHomePathRequirement(object): diff --git a/ersilia/setup/requirements/git.py b/ersilia/setup/requirements/git.py index aef467bfa..e4d235e4c 100644 --- a/ersilia/setup/requirements/git.py +++ b/ersilia/setup/requirements/git.py @@ -1,7 +1,7 @@ from ... import throw_ersilia_exception from ...utils.exceptions_utils.setup_exceptions import ( - GitLfsSetupError, GithubCliSetupError, + GitLfsSetupError, ) from ...utils.terminal import run_command, run_command_check_output diff --git a/ersilia/setup/requirements/isaura.py b/ersilia/setup/requirements/isaura.py index 954974239..6e4857c71 100644 --- a/ersilia/setup/requirements/isaura.py +++ b/ersilia/setup/requirements/isaura.py @@ -1,4 +1,5 @@ import importlib + from ...utils.terminal import run_command diff --git a/ersilia/setup/requirements/ping.py b/ersilia/setup/requirements/ping.py index 155e5920e..40cd6638e 100644 --- a/ersilia/setup/requirements/ping.py +++ b/ersilia/setup/requirements/ping.py @@ -1,6 +1,7 @@ +import requests + from ... import throw_ersilia_exception from ...utils.exceptions_utils.setup_exceptions import PingError -import requests class PingRequirement(object): diff --git a/ersilia/setup/utils/clone.py b/ersilia/setup/utils/clone.py index 4720fa788..c42c7fe13 100644 --- a/ersilia/setup/utils/clone.py +++ b/ersilia/setup/utils/clone.py @@ -1,9 +1,9 @@ -import shutil import os +import shutil from ... import ErsiliaBase -from ...utils.download import GitHubDownloader from ...utils.config import Checker +from ...utils.download import GitHubDownloader from ...utils.versioning import Versioner diff --git a/ersilia/store/api.py b/ersilia/store/api.py index 7f9a10fb2..6f6505c15 100644 --- a/ersilia/store/api.py +++ b/ersilia/store/api.py @@ -1,14 +1,16 @@ +import csv +import uuid + +import requests + from ersilia.core.base import ErsiliaBase +from ersilia.default import INFERENCE_STORE_API_URL from ersilia.io.input import GenericInputAdapter from ersilia.store.utils import ( - PrecalculationsNotInStore, PrecalculationsInStore, + PrecalculationsNotInStore, delete_file_upon_upload, ) -from ersilia.default import INFERENCE_STORE_API_URL -import requests -import uuid -import csv class InferenceStoreApi(ErsiliaBase): diff --git a/ersilia/store/utils.py b/ersilia/store/utils.py index b07465c4b..255a4738d 100644 --- a/ersilia/store/utils.py +++ b/ersilia/store/utils.py @@ -38,10 +38,36 @@ class OutputSource: @classmethod def is_local(cls, option): + """ + Check if the option is local. + + Parameters + ---------- + option : str + The option to check. + + Returns + ------- + bool + True if the option is local, False otherwise. + """ return option == cls.LOCAL_ONLY @classmethod def is_cloud(cls, option): + """ + Check if the option is cloud. + + Parameters + ---------- + option : str + The option to check. + + Returns + ------- + bool + True if the option is cloud, False otherwise. + """ return option == cls.CLOUD_ONLY @@ -60,6 +86,9 @@ def __init__(self, model_id): self.model_id = model_id def echo(self): + """ + Echo the message for model not found in inference store. + """ super()._echo( "Model {0} could not be found in inference store".format(self.model_id), fg="red", @@ -87,6 +116,9 @@ def __init__(self, model_id): self.model_id = model_id def echo(self): + """ + Echo the message for precalculations not found in inference store. + """ super()._echo( "Precalculations for model {0} could not be found in inference store".format( self.model_id @@ -118,6 +150,14 @@ def __init__(self, model_id, output_url): self.output_url = output_url def echo(self): + """ + Echo the message for precalculations available for download. + + Parameters + ---------- + output_url : str + The URL for downloading the precalculations. + """ super()._echo( "Precalculations for model {0} are now available for download via this link (expires in 60 minutes): {1}".format( self.model_id, self.output_url diff --git a/ersilia/tools/bentoml/__init__.py b/ersilia/tools/bentoml/__init__.py index 1d6693b1a..067366203 100644 --- a/ersilia/tools/bentoml/__init__.py +++ b/ersilia/tools/bentoml/__init__.py @@ -1 +1 @@ -from .configuration import config +from .configuration import config as config diff --git a/ersilia/tools/bentoml/configuration/__init__.py b/ersilia/tools/bentoml/configuration/__init__.py index f0aec1dad..8d2d60b01 100644 --- a/ersilia/tools/bentoml/configuration/__init__.py +++ b/ersilia/tools/bentoml/configuration/__init__.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import logging +import os from functools import lru_cache from pathlib import Path -from bentoml import __version__, _version as version_mod -from bentoml.exceptions import BentoMLConfigException +from bentoml import __version__ +from bentoml import _version as version_mod from bentoml.configuration.configparser import BentoMLConfigParser +from bentoml.exceptions import BentoMLConfigException # Note this file is loaded prior to logging being configured, thus logger is only # used within functions in this file diff --git a/ersilia/tools/bentoml/configuration/configparser.py b/ersilia/tools/bentoml/configuration/configparser.py index 3d85cfc17..d71e305b3 100644 --- a/ersilia/tools/bentoml/configuration/configparser.py +++ b/ersilia/tools/bentoml/configuration/configparser.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import logging +import os from collections import OrderedDict from configparser import ConfigParser diff --git a/ersilia/tools/bentoml/exceptions.py b/ersilia/tools/bentoml/exceptions.py index 50efabf2a..c9889f2a2 100644 --- a/ersilia/tools/bentoml/exceptions.py +++ b/ersilia/tools/bentoml/exceptions.py @@ -1,6 +1,14 @@ class BentoMLException(Exception): + """ + Exception raised for errors in the BentoML tool. + """ + pass class BentoMLConfigException(Exception): + """ + Exception raised for configuration errors in the BentoML tool. + """ + pass diff --git a/ersilia/tools/bentoml/types.py b/ersilia/tools/bentoml/types.py index 01916423f..eb5553783 100644 --- a/ersilia/tools/bentoml/types.py +++ b/ersilia/tools/bentoml/types.py @@ -19,11 +19,11 @@ Union, ) +from bentoml import config from multidict import CIMultiDict from werkzeug.formparser import parse_form_data from werkzeug.http import parse_options_header -from bentoml import config from .utils.dataclasses import json_serializer BATCH_HEADER = config("apiserver").get("batch_request_header") @@ -94,6 +94,14 @@ def path(self): @property def stream(self): + """ + Get the stream. + + Returns + ------- + object + The stream object. + """ if self._stream is not None: pass elif self.bytes_ is not None: @@ -105,16 +113,53 @@ def stream(self): return self._stream def read(self, size=-1): + """ + Read from the stream. + + Parameters + ---------- + size : int, optional + The number of bytes to read. Default is -1 (read all). + + Returns + ------- + bytes + The read bytes. + """ # TODO: also write to log return self.stream.read(size) def seek(self, pos): + """ + Seek to a position in the stream. + + Parameters + ---------- + pos : int + The position to seek to. + + Returns + ------- + int + The new position. + """ return self.stream.seek(pos) def tell(self): + """ + Tell the current position in the stream. + + Returns + ------- + int + The current position. + """ return self.stream.tell() def close(self): + """ + Close the stream. + """ if self._stream is not None: self._stream.close() @@ -151,30 +196,96 @@ class HTTPHeaders(CIMultiDict): @property def content_type(self) -> str: + """ + Get the content type. + + Returns + ------- + str + The content type. + """ return parse_options_header(self.get("content-type"))[0].lower() @property def charset(self) -> Optional[str]: + """ + Get the charset. + + Returns + ------- + Optional[str] + The charset, if available. + """ return parse_options_header(self.get("content-type"))[1].get("charset", None) @property def content_encoding(self) -> str: + """ + Get the content encoding. + + Returns + ------- + str + The content encoding. + """ return parse_options_header(self.get("content-encoding"))[0].lower() @property def is_batch_input(self) -> bool: + """ + Check if the input is batch input. + + Returns + ------- + bool + True if the input is batch input, False otherwise. + """ hv = parse_options_header(self.get(BATCH_HEADER))[0].lower() return hv == "true" if hv else None @classmethod def from_dict(cls, d: Mapping[str, str]): + """ + Create an instance from a dictionary. + + Parameters + ---------- + d : Mapping[str, str] + The dictionary to create the instance from. + + Returns + ------- + object + The created instance. + """ return cls(d) @classmethod def from_sequence(cls, seq: Sequence[Tuple[str, str]]): + """ + Create an instance from a sequence. + + Parameters + ---------- + seq : Sequence[Tuple[str, str]] + The sequence to create the instance from. + + Returns + ------- + object + The created instance. + """ return cls(seq) def to_json(self): + """ + Convert the instance to JSON. + + Returns + ------- + tuple + The JSON representation of the instance. + """ return tuple(self.items()) @@ -204,6 +315,19 @@ def __post_init__(self): @classmethod def parse_form_data(cls, self): + """ + Parse form data. + + Parameters + ---------- + self : object + The object containing the form data. + + Returns + ------- + tuple + The parsed form data. + """ if not self.body: return None, None, {} environ = { @@ -220,12 +344,33 @@ def parse_form_data(cls, self): @classmethod def from_flask_request(cls, request): + """ + Create an instance from a Flask request. + + Parameters + ---------- + request : object + The Flask request object. + + Returns + ------- + object + The created instance. + """ return cls( tuple((k, v) for k, v in request.headers.items()), request.get_data(), ) def to_flask_request(self): + """ + Convert the instance to a Flask request. + + Returns + ------- + object + The Flask request object. + """ from werkzeug.wrappers import Request return Request.from_values( @@ -237,6 +382,10 @@ def to_flask_request(self): @dataclass class HTTPResponse: + """ + Class representing an HTTP response. + """ + status: int = 200 headers: HTTPHeaders = HTTPHeaders() body: bytes = b"" @@ -250,6 +399,14 @@ def __post_init__(self): self.headers = HTTPHeaders.from_sequence(self.headers) def to_flask_response(self): + """ + Convert the instance to a Flask response. + + Returns + ------- + object + The Flask response object. + """ import flask return flask.Response( diff --git a/ersilia/tools/bentoml/utils/dataclasses.py b/ersilia/tools/bentoml/utils/dataclasses.py index 63d343d3d..7c81c4ebd 100644 --- a/ersilia/tools/bentoml/utils/dataclasses.py +++ b/ersilia/tools/bentoml/utils/dataclasses.py @@ -13,9 +13,8 @@ # limitations under the License. import json -from dataclasses import asdict +from dataclasses import asdict, is_dataclass from dataclasses import fields as get_fields -from dataclasses import is_dataclass class DataclassJsonEncoder(json.JSONEncoder): diff --git a/ersilia/utils/__init__.py b/ersilia/utils/__init__.py index e9e8c2c1b..bab4778b0 100644 --- a/ersilia/utils/__init__.py +++ b/ersilia/utils/__init__.py @@ -1,5 +1,5 @@ import os -from ..default import SESSIONS_DIR + from ..utils.session import get_session_dir diff --git a/ersilia/utils/conda.py b/ersilia/utils/conda.py index 0860f5253..43abdfcca 100644 --- a/ersilia/utils/conda.py +++ b/ersilia/utils/conda.py @@ -1,18 +1,18 @@ # TODO Implement conda python telemetry for resources during model execution -import os -import json import hashlib +import json +import os import shutil -from collections import defaultdict, OrderedDict -from .terminal import run_command, run_command_check_output -from .docker import SimpleDockerfileParser -from .versioning import Versioner -from .supp.conda_env_resolve import CHECKSUM_NCHAR, CHECKSUM_FILE +from collections import OrderedDict, defaultdict + +from .. import logger, throw_ersilia_exception from ..default import CONDA_ENV_YML_FILE -from .. import logger from ..utils.exceptions_utils.fetch_exceptions import ModelPackageInstallError from ..utils.logging import make_temp_dir -from .. import throw_ersilia_exception +from .docker import SimpleDockerfileParser +from .supp.conda_env_resolve import CHECKSUM_FILE, CHECKSUM_NCHAR +from .terminal import run_command, run_command_check_output +from .versioning import Versioner BASE = "base" SPECS_JSON = ".specs.json" @@ -366,11 +366,11 @@ def specs_from_dockerfile( The name or checksum of the specs. """ if use_checksum: - return self.checksum_from_dockerfile(dockerfile, dest) # TODO debug + return self.checksum_from_dockerfile(dockerfile_dir, dest) # TODO debug else: if dest is None: dest = dockerfile_dir - json_path = self.specs_from_dockerfile_as_json( + self.specs_from_dockerfile_as_json( dockerfile_dir, dest=dest ) # TODO remove? filename = os.path.join(dest, self.CHECKSUM_FILE) diff --git a/ersilia/utils/config.py b/ersilia/utils/config.py index f9cf1c577..9eac4aadd 100644 --- a/ersilia/utils/config.py +++ b/ersilia/utils/config.py @@ -1,14 +1,14 @@ -import os import json +import os + from ..default import ( - EOS, - GITHUB_ORG, - GITHUB_ERSILIA_REPO, CONFIG_JSON, CREDENTIALS_JSON, + EOS, + GITHUB_ERSILIA_REPO, + GITHUB_ORG, ) - SECRETS_JSON = "secrets.json" GDRIVE_CLIENT_SECRETS_JSON = "gdrive_client_secrets.json" ERSILIA_SECRETS_GITHUB_REPO = "ersilia-secrets" @@ -176,7 +176,7 @@ def __init__(self, json_file=None): if json_file is None: try: json_file = os.environ["EOS_CONFIG"] - except KeyError as err: + except KeyError: json_file = os.path.join(EOS, CONFIG_JSON) except Exception as err: raise err @@ -281,7 +281,7 @@ def __init__(self, json_file=None): if json_file is None: try: json_file = os.environ["EOS_CREDENTIALS"] - except KeyError as err: + except KeyError: json_file = os.path.join(EOS, CREDENTIALS_JSON) except Exception as err: raise err diff --git a/ersilia/utils/cron.py b/ersilia/utils/cron.py index bfa9c0645..8e6dc646f 100644 --- a/ersilia/utils/cron.py +++ b/ersilia/utils/cron.py @@ -1,10 +1,9 @@ -import json import csv +import json import time - -from ..hub.delete.delete import ModelFullDeleter from pathlib import Path +from ..hub.delete.delete import ModelFullDeleter # in days : model_usage_lim = 30 diff --git a/ersilia/utils/csvfile.py b/ersilia/utils/csvfile.py index eb092e458..dd08015c0 100644 --- a/ersilia/utils/csvfile.py +++ b/ersilia/utils/csvfile.py @@ -1,6 +1,6 @@ -import os import csv import json +import os class CsvDataLoader(object): diff --git a/ersilia/utils/docker.py b/ersilia/utils/docker.py index dfd9ce8cf..13e3a09b7 100644 --- a/ersilia/utils/docker.py +++ b/ersilia/utils/docker.py @@ -1,25 +1,24 @@ import os -import docker import subprocess import threading import time -import json -from dockerfile_parse import DockerfileParser -from .identifiers.long import LongIdentifier -from .terminal import run_command, run_command_check_output +import docker +from dockerfile_parse import DockerfileParser from .. import logger from ..default import ( DEFAULT_DOCKER_PLATFORM, DEFAULT_UDOCKER_USERNAME, - DOCKERHUB_ORG, DOCKERHUB_LATEST_TAG, + DOCKERHUB_ORG, PACK_METHOD_BENTOML, PACK_METHOD_FASTAPI, ) -from ..utils.system import SystemChecker from ..utils.logging import make_temp_dir +from ..utils.system import SystemChecker +from .identifiers.long import LongIdentifier +from .terminal import run_command, run_command_check_output def resolve_pack_method_docker(model_id): @@ -174,16 +173,16 @@ def containers(self, only_run): cnt_dict = {} with open(tmp_file, "r") as f: h = next(f) - cnt_idx = h.find("CONTAINER ID") + # cnt_idx = h.find("CONTAINER ID") img_idx = h.find("IMAGE") cmd_idx = h.find("COMMAND") - sts_idx = h.find("STATUS") - pts_idx = h.find("PORTS") + # sts_idx = h.find("STATUS") + # pts_idx = h.find("PORTS") nam_idx = h.find("NAMES") for l in f: - cnt = l[cnt_idx:img_idx].strip() + # cnt = l[cnt_idx:img_idx].strip() img = l[img_idx:cmd_idx].strip() - sts = l[sts_idx:pts_idx].strip() + # sts = l[sts_idx:pts_idx].strip() nam = l[nam_idx:].strip() cnt_dict[nam] = img return cnt_dict @@ -314,9 +313,7 @@ def run(self, org, img, tag, name, memory=None): run_command(cmd) else: # TODO - cmd = "sudo -u {0} udocker run {2} bash".format( - DEFAULT_UDOCKER_USERNAME, self._image_name(org, img, tag) - ) + cmd = "sudo -u {0} udocker run {2} bash".format(DEFAULT_UDOCKER_USERNAME) # noqa: F524 run_command(cmd) return name diff --git a/ersilia/utils/download.py b/ersilia/utils/download.py index 53393f25a..2a27c642b 100644 --- a/ersilia/utils/download.py +++ b/ersilia/utils/download.py @@ -1,20 +1,20 @@ """Download utilities""" import os -import zipfile -import requests import shutil +import subprocess +import sys import tempfile import uuid +import zipfile +from pathlib import Path + import requests -import sys -import subprocess -from click import echo -from .terminal import run_command -from .. import logger -from ..default import S3_BUCKET_URL, S3_BUCKET_URL_ZIP +from .. import logger +from ..default import S3_BUCKET_URL from ..utils.logging import make_temp_dir +from .terminal import run_command class PseudoDownloader(object): @@ -109,6 +109,19 @@ def __init__(self): @staticmethod def get_confirm_token(response): + """ + Get the confirmation token from the response. + + Parameters + ---------- + response : object + The response object. + + Returns + ------- + str + The confirmation token, if available. + """ for key, value in response.cookies.items(): if key.startswith("download_warning"): return value @@ -116,6 +129,16 @@ def get_confirm_token(response): @staticmethod def save_response_content(response, destination): + """ + Save the response content to a file. + + Parameters + ---------- + response : object + The response object. + destination : str + The destination file path. + """ chunk_size = 32768 with open(destination, "wb") as f: for chunk in response.iter_content(chunk_size): @@ -240,38 +263,50 @@ def _list_lfs_files(self, destination): return clean_lfs_files_list def _download_s3_files(self, filename, repo, destination): - # This function takes S3 filename as input and tries to download it - # from a location given in S3_BUCKET_URL at default.py + file_url = f"{S3_BUCKET_URL}/{repo}/{filename}" + local_filename = Path(destination) / filename - file_url = S3_BUCKET_URL + "/" + repo + "/" + filename - local_filename = destination + "/" + filename try: - with requests.get(file_url, stream=True) as r: - r.raise_for_status() - dl = 0 - total_length = int(r.headers.get("content-length")) - if total_length is None: # no content length header - f.write(r.content) + response = requests.get(file_url, stream=True) + response.raise_for_status() + + total_length = response.headers.get("content-length") + total_length = int(total_length) if total_length else None + + local_filename.parent.mkdir(parents=True, exist_ok=True) + + with open(local_filename, "wb") as file: + if total_length is None: + # No content-length header, write content directly + file.write(response.content) else: - with open(local_filename, "wb") as f: - echo( - "Downloading large file {} from S3 bucket.".format(filename) - ) - for chunk in r.iter_content(chunk_size=8192): - dl += len(chunk) - f.write(chunk) - done = int(50 * dl / total_length) - sys.stdout.write( - "\r[%s%s]" % ("=" * done, " " * (50 - done)) - ) - sys.stdout.flush() - echo("✅\n") - except: + self._download_large_file(response, file, total_length, filename) + + self.logger.info( + f"✅ Successfully downloaded {filename} to {local_filename}" + ) + except requests.RequestException as e: self.logger.error( - "❗Could not download file {} from S3 bucket.\n We will try Git LFS.".format( - file_url - ) + f"❗ Could not download file {filename} from S3 bucket: {file_url}. " + "Falling back to Git LFS if available." ) + self.logger.debug(f"Error details: {e}") + except Exception as e: + self.logger.error(f"❗ Unexpected error while downloading {filename}: {e}") + + def _download_large_file(self, response, file, total_length, filename): + self.logger.info(f"Downloading large file {filename} from S3 bucket.") + downloaded = 0 + + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + downloaded += len(chunk) + + done = int(50 * downloaded / total_length) + sys.stdout.write(f"\r[{'=' * done}{' ' * (50 - done)}]") + sys.stdout.flush() + + sys.stdout.write("\n") def _check_large_file_checksum(self, filename, destination): # This function takes filenames and checksums from lfs ls-files diff --git a/ersilia/utils/dvc.py b/ersilia/utils/dvc.py index bfe202701..345c89fa9 100644 --- a/ersilia/utils/dvc.py +++ b/ersilia/utils/dvc.py @@ -1,7 +1,9 @@ -from . import terminal -import h5py import os + +import h5py + from ..default import H5_DATA_FILE, ISAURA_GDRIVE, ISAURA_TEAM_GDRIVE +from . import terminal try: from pydrive2.auth import GoogleAuth diff --git a/ersilia/utils/environment.py b/ersilia/utils/environment.py index 3622da512..a66f05811 100644 --- a/ersilia/utils/environment.py +++ b/ersilia/utils/environment.py @@ -6,6 +6,10 @@ class Environment(object): + """ + Class to handle environment settings. + """ + def __init__(self): self.python_packages = {dist.metadata["Name"] for dist in distributions()} diff --git a/ersilia/utils/exceptions_utils/api_exceptions.py b/ersilia/utils/exceptions_utils/api_exceptions.py index f80c3f001..7795698c2 100644 --- a/ersilia/utils/exceptions_utils/api_exceptions.py +++ b/ersilia/utils/exceptions_utils/api_exceptions.py @@ -1,3 +1,4 @@ +# ruff: noqa: D101, D102 from .exceptions import ErsiliaError diff --git a/ersilia/utils/exceptions_utils/base_information_exceptions.py b/ersilia/utils/exceptions_utils/base_information_exceptions.py index 3c0fe754f..01433ae53 100644 --- a/ersilia/utils/exceptions_utils/base_information_exceptions.py +++ b/ersilia/utils/exceptions_utils/base_information_exceptions.py @@ -1,7 +1,9 @@ import os -from .exceptions import ErsiliaError from ...default import AIRTABLE_MODEL_HUB_VIEW_URL +from .exceptions import ErsiliaError + +# ruff: noqa: D101, D102 def _read_default_fields(field): diff --git a/ersilia/utils/exceptions_utils/card_exceptions.py b/ersilia/utils/exceptions_utils/card_exceptions.py index 2f5343383..12aea8efe 100644 --- a/ersilia/utils/exceptions_utils/card_exceptions.py +++ b/ersilia/utils/exceptions_utils/card_exceptions.py @@ -1,7 +1,7 @@ -import os from .exceptions import ErsiliaError +# ruff: noqa: D101 class CardErsiliaError(ErsiliaError): def __init__(self): self.message = "Error occured while running card command" diff --git a/ersilia/utils/exceptions_utils/catalog_exceptions.py b/ersilia/utils/exceptions_utils/catalog_exceptions.py index d5baa3433..1828af0fa 100644 --- a/ersilia/utils/exceptions_utils/catalog_exceptions.py +++ b/ersilia/utils/exceptions_utils/catalog_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class CatalogErsiliaError(ErsiliaError): def __init__(self): diff --git a/ersilia/utils/exceptions_utils/clear_exceptions.py b/ersilia/utils/exceptions_utils/clear_exceptions.py index 869bd3a8c..e0a71b3b9 100644 --- a/ersilia/utils/exceptions_utils/clear_exceptions.py +++ b/ersilia/utils/exceptions_utils/clear_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class ClearErsiliaError(ErsiliaError): def __init__(self): diff --git a/ersilia/utils/exceptions_utils/close_exceptions.py b/ersilia/utils/exceptions_utils/close_exceptions.py index 42fbd0643..941f1e6b6 100644 --- a/ersilia/utils/exceptions_utils/close_exceptions.py +++ b/ersilia/utils/exceptions_utils/close_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class CloseErsiliaError(ErsiliaError): def __init__(self): diff --git a/ersilia/utils/exceptions_utils/delete_exceptions.py b/ersilia/utils/exceptions_utils/delete_exceptions.py index 23ec38792..38158c93a 100644 --- a/ersilia/utils/exceptions_utils/delete_exceptions.py +++ b/ersilia/utils/exceptions_utils/delete_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class ModelDeleteError(ErsiliaError): def __init__(self, model): diff --git a/ersilia/utils/exceptions_utils/example_exceptions.py b/ersilia/utils/exceptions_utils/example_exceptions.py index c3a7ac9f5..68f2052e9 100644 --- a/ersilia/utils/exceptions_utils/example_exceptions.py +++ b/ersilia/utils/exceptions_utils/example_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class ExampleErsiliaError(ErsiliaError): def __init__(self): diff --git a/ersilia/utils/exceptions_utils/exceptions.py b/ersilia/utils/exceptions_utils/exceptions.py index 4d34a4690..8f4e95032 100644 --- a/ersilia/utils/exceptions_utils/exceptions.py +++ b/ersilia/utils/exceptions_utils/exceptions.py @@ -1,8 +1,8 @@ -from ... import ErsiliaBase import os -import tempfile -from ...utils.terminal import run_command + +from ... import ErsiliaBase from ...utils.logging import make_temp_dir +from ...utils.terminal import run_command class ErsiliaError(Exception): @@ -23,7 +23,10 @@ class ErsiliaError(Exception): .. code-block:: python try: - raise ErsiliaError("An error occurred", hints="Check your configuration") + raise ErsiliaError( + "An error occurred", + hints="Check your configuration", + ) except ErsiliaError as e: print(e) @@ -47,6 +50,10 @@ def __init__( class MissingDependencyError(ErsiliaError): + """ + Exception raised for missing dependency errors. + """ + def __init__(self, dependency): self.dependency = dependency self.message = "Missing dependency {0}".format(self.dependency) @@ -55,6 +62,10 @@ def __init__(self, dependency): class NullModelIdentifierError(ErsiliaError): + """ + Exception raised for null model identifier errors. + """ + def __init__(self, model): self.model = model self.message = "Model identifier {0} is null".format(self.model) @@ -63,6 +74,10 @@ def __init__(self, model): class InvalidModelIdentifierError(ErsiliaError): + """ + Exception raised for invalid model identifier errors. + """ + def __init__(self, model): self.model = model self.message = "Could not identify model identifier or slug: {0}:".format( @@ -75,6 +90,10 @@ def __init__(self, model): class ModelNotAvailableLocallyError(ErsiliaError): + """ + Exception raised when the model is not available locally. + """ + def __init__(self, model): self.model = model self.message = ( @@ -88,6 +107,10 @@ def __init__(self, model): class EmptyOutputError(ErsiliaError): + """ + Exception raised for empty output errors. + """ + def __init__(self, model_id, api_name): self.model_id = model_id self.api_name = api_name @@ -100,6 +123,9 @@ def __init__(self, model_id, api_name): ErsiliaError.__init__(self, self.message, self.hints) def run_from_terminal(self): + """ + Run the error handling from the terminal. + """ eb = ErsiliaBase() bundle_dir = eb._get_bundle_location(model_id=self.model_id) framework_dir = os.path.join( @@ -114,7 +140,9 @@ def run_from_terminal(self): output_file = os.path.join(framework_dir, "example_output.csv") tmp_folder = make_temp_dir(prefix="ersilia-") log_file = os.path.join(tmp_folder, "terminal.log") - run_command("ersilia example {0} -n 3 -f {1}".format(self.model_id, input_file)) + run_command( + "ersilia example inputs {0} -n 3 -f {1}".format(self.model_id, input_file) + ) cmd = "bash {0} {1} {2} {3} 2>&1 | tee -a {4}".format( exec_file, framework_dir, input_file, output_file, log_file ) diff --git a/ersilia/utils/exceptions_utils/fetch_exceptions.py b/ersilia/utils/exceptions_utils/fetch_exceptions.py index 82f338e30..442fff35f 100644 --- a/ersilia/utils/exceptions_utils/fetch_exceptions.py +++ b/ersilia/utils/exceptions_utils/fetch_exceptions.py @@ -1,6 +1,7 @@ -from typing import Any from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class FetchErsiliaError(ErsiliaError): def __init__(self, model_id): diff --git a/ersilia/utils/exceptions_utils/handle_undecorated_exception.py b/ersilia/utils/exceptions_utils/handle_undecorated_exception.py index 21b3bb923..7adc1cf22 100644 --- a/ersilia/utils/exceptions_utils/handle_undecorated_exception.py +++ b/ersilia/utils/exceptions_utils/handle_undecorated_exception.py @@ -1,9 +1,11 @@ # from . import echo -from ...utils.cli_query import query_yes_no -from ...utils.exceptions_utils.issue_reporting import send_exception_issue import sys + import click +from ...utils.cli_query import query_yes_no +from ...utils.exceptions_utils.issue_reporting import send_exception_issue + try: import emoji except: diff --git a/ersilia/utils/exceptions_utils/hubdata_exceptions.py b/ersilia/utils/exceptions_utils/hubdata_exceptions.py index f96903c8e..35567c342 100644 --- a/ersilia/utils/exceptions_utils/hubdata_exceptions.py +++ b/ersilia/utils/exceptions_utils/hubdata_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + # Note: Not really used anywhere right now except in the sanitize class class InvalidUrlInAirtableError(ErsiliaError): diff --git a/ersilia/utils/exceptions_utils/issue_reporting.py b/ersilia/utils/exceptions_utils/issue_reporting.py index db93a618b..8fd63f155 100644 --- a/ersilia/utils/exceptions_utils/issue_reporting.py +++ b/ersilia/utils/exceptions_utils/issue_reporting.py @@ -1,8 +1,9 @@ -import requests import json import subprocess from datetime import datetime +import requests + # REPO_OWNER = 'ersilia-os' # REPO_NAME = 'ersilia' REPO_OWNER = "azycn" diff --git a/ersilia/utils/exceptions_utils/pull_exceptions.py b/ersilia/utils/exceptions_utils/pull_exceptions.py index bfab68e3e..610893501 100644 --- a/ersilia/utils/exceptions_utils/pull_exceptions.py +++ b/ersilia/utils/exceptions_utils/pull_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class DockerImageNotAvailableError(ErsiliaError): def __init__(self, model): diff --git a/ersilia/utils/exceptions_utils/serve_exceptions.py b/ersilia/utils/exceptions_utils/serve_exceptions.py index ef19a37f5..64b4b2b3c 100644 --- a/ersilia/utils/exceptions_utils/serve_exceptions.py +++ b/ersilia/utils/exceptions_utils/serve_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class ServeErsiliaError(ErsiliaError): def __init__(self): diff --git a/ersilia/utils/exceptions_utils/setup_exceptions.py b/ersilia/utils/exceptions_utils/setup_exceptions.py index 09bb7771e..2c9071112 100644 --- a/ersilia/utils/exceptions_utils/setup_exceptions.py +++ b/ersilia/utils/exceptions_utils/setup_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class GitLfsSetupError(ErsiliaError): def __init__( diff --git a/ersilia/utils/exceptions_utils/test_exceptions.py b/ersilia/utils/exceptions_utils/test_exceptions.py index de55a1915..8150a9366 100644 --- a/ersilia/utils/exceptions_utils/test_exceptions.py +++ b/ersilia/utils/exceptions_utils/test_exceptions.py @@ -1,5 +1,7 @@ from .exceptions import ErsiliaError +# ruff: noqa: D101, D102 + class WrongCardIdentifierError(ErsiliaError): def __init__(self, model_id): diff --git a/ersilia/utils/exceptions_utils/throw_ersilia_exception.py b/ersilia/utils/exceptions_utils/throw_ersilia_exception.py index a8a8dd978..63aafe65c 100644 --- a/ersilia/utils/exceptions_utils/throw_ersilia_exception.py +++ b/ersilia/utils/exceptions_utils/throw_ersilia_exception.py @@ -1,8 +1,7 @@ import sys + import click -from ...utils.cli_query import query_yes_no -from ...utils.exceptions_utils.issue_reporting import send_exception_issue from ... import EOS from ...default import CURRENT_LOGGING_FILE, DEFAULT_ERSILIA_ERROR_EXIT_CODE diff --git a/ersilia/utils/identifiers/arbitrary.py b/ersilia/utils/identifiers/arbitrary.py index 5ca3e7164..c2ba9a9ed 100644 --- a/ersilia/utils/identifiers/arbitrary.py +++ b/ersilia/utils/identifiers/arbitrary.py @@ -2,10 +2,27 @@ class ArbitraryIdentifier(object): + """ + Class for handling arbitrary identifiers. + """ + def __init__(self): pass def encode(self, text: str) -> str: + """ + Encode the given text using MD5. + + Parameters + ---------- + text : str + The text to encode. + + Returns + ------- + str + The encoded text. + """ return hashlib.md5(text.encode("utf-8")).hexdigest() diff --git a/ersilia/utils/identifiers/compound.py b/ersilia/utils/identifiers/compound.py index 1d59a24fd..d745d2eb4 100644 --- a/ersilia/utils/identifiers/compound.py +++ b/ersilia/utils/identifiers/compound.py @@ -1,9 +1,11 @@ import asyncio -import nest_asyncio -import aiohttp import urllib.parse -import requests from functools import lru_cache + +import aiohttp +import nest_asyncio +import requests + from ..logging import logger try: @@ -11,8 +13,7 @@ except: unichem = None try: - from rdkit import Chem - from rdkit import RDLogger + from rdkit import Chem, RDLogger RDLogger.DisableLog("rdApp.*") except: @@ -231,7 +232,7 @@ async def _pubchem_smiles_to_inchikey(session, smiles): return None data = await response.json() return data["PropertyTable"]["Properties"][0]["InChIKey"] - except Exception as e: + except Exception: return None @staticmethod @@ -296,6 +297,20 @@ def convert_smiles_to_inchikey_with_rdkit(self, smiles): return None async def process_smiles(self, smiles, semaphore, session, result_list): + """ + Process a SMILES string asynchronously. + + Parameters + ---------- + smiles : str + The SMILES string to process. + semaphore : asyncio.Semaphore + The semaphore to limit concurrency. + session : aiohttp.ClientSession + The HTTP session for making requests. + result_list : list + The list to store results. + """ async with semaphore: # high performance resource manager inchikey = self.convert_smiles_to_inchikey_with_rdkit(smiles) @@ -315,6 +330,19 @@ async def process_smiles(self, smiles, semaphore, session, result_list): logger.info(f"No InChIKey found for SMILES {smiles}. Skipping.") async def encode_batch(self, smiles_list): + """ + Encode a batch of SMILES strings asynchronously. + + Parameters + ---------- + smiles_list : list + The list of SMILES strings to encode. + + Returns + ------- + list + The list of encoded results. + """ result_list = [] semaphore = asyncio.Semaphore(self.concurrency_limit) async with aiohttp.ClientSession() as session: diff --git a/ersilia/utils/identifiers/long.py b/ersilia/utils/identifiers/long.py index 8da87a4e2..11cf5285a 100644 --- a/ersilia/utils/identifiers/long.py +++ b/ersilia/utils/identifiers/long.py @@ -1,6 +1,6 @@ try: import uuid -except ModuleNotFoundError as err: +except ModuleNotFoundError: uuid = None import random @@ -34,6 +34,7 @@ def encode(): """ if uuid is None: alphabet = ALPHABET.lower() + s = [] for n in PATTERN: s += ["".join([random.choice(alphabet) for _ in range(n)])] return "-".join(s) diff --git a/ersilia/utils/identifiers/model.py b/ersilia/utils/identifiers/model.py index 325f7e370..d805fc31c 100644 --- a/ersilia/utils/identifiers/model.py +++ b/ersilia/utils/identifiers/model.py @@ -1,6 +1,7 @@ +import json import random import string -import json + from ..paths import Paths from ..terminal import run_command_check_output diff --git a/ersilia/utils/identifiers/protein.py b/ersilia/utils/identifiers/protein.py index 84422d27b..f7701080c 100644 --- a/ersilia/utils/identifiers/protein.py +++ b/ersilia/utils/identifiers/protein.py @@ -9,9 +9,21 @@ class ProteinIdentifier(object): """ def __init__(self): - self.seguid = seguid + self.seguid = self.generate_seguid() self.uniprot = UniProt(verbose=False) + def generate_seguid(self): + """ + Generate a SEGUID for the protein. + + Returns + ------- + str + The generated SEGUID. + """ + # Implementation for generating SEGUID + pass + def sequence_from_uniprot(self, uniprot_ac): """ Returns protein sequence from UniProt identifier. diff --git a/ersilia/utils/identifiers/short.py b/ersilia/utils/identifiers/short.py index 70c6d1836..d4e035348 100644 --- a/ersilia/utils/identifiers/short.py +++ b/ersilia/utils/identifiers/short.py @@ -2,11 +2,11 @@ try: from hashids import Hashids -except ModuleNotFoundError as err: +except ModuleNotFoundError: Hashids = None try: from datetime import datetime -except ModuleNotFoundError as err: +except ModuleNotFoundError: datetime = None ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890" diff --git a/ersilia/utils/identifiers/timestamp.py b/ersilia/utils/identifiers/timestamp.py index 9ad15fe5b..12b4228ea 100644 --- a/ersilia/utils/identifiers/timestamp.py +++ b/ersilia/utils/identifiers/timestamp.py @@ -2,10 +2,22 @@ class TimeStampIdentifier(object): + """ + Class for handling timestamp identifiers. + """ + def __init__(self): self.stamp = datetime.now() def encode(self): + """ + Encode the current timestamp. + + Returns + ------- + str + The encoded timestamp. + """ return self.stamp.strftime("%Y%m%d%H%M%S") diff --git a/ersilia/utils/import.py b/ersilia/utils/import.py index 2f979a064..467f3fe11 100644 --- a/ersilia/utils/import.py +++ b/ersilia/utils/import.py @@ -5,5 +5,5 @@ def import_extra(mod): """Try to import a module, if not found return None""" try: return importlib.import_module(mod) - except ImportError as err: + except ImportError: return None diff --git a/ersilia/utils/installers.py b/ersilia/utils/installers.py index a97449f52..35bbf178b 100644 --- a/ersilia/utils/installers.py +++ b/ersilia/utils/installers.py @@ -1,17 +1,17 @@ -import shutil import os +import shutil import sys -import tempfile -from .conda import SimpleConda + +import click + +from .. import ErsiliaBase, check_install_status +from ..default import CONFIG_JSON, EOS from ..setup.baseconda import SetupBaseConda -from ..default import EOS, CONFIG_JSON -from .. import ErsiliaBase -from .. import check_install_status +from .conda import SimpleConda from .config import Checker +from .logging import make_temp_dir from .terminal import run_command from .versioning import Versioner -import click -from .logging import make_temp_dir INSTALL_LOG_FILE = ".install.log" @@ -182,9 +182,10 @@ def rdkit(self): if self._is_done("rdkit"): return try: - import rdkit + import importlib.util - exists = True + if importlib.util.find_spec("rdkit") is not None: + exists = True except ModuleNotFoundError: exists = False if exists: @@ -285,7 +286,6 @@ def server_docker(self): """ if self._is_done("server_docker"): return - import tempfile from .docker import SimpleDocker docker = SimpleDocker() diff --git a/ersilia/utils/logging.py b/ersilia/utils/logging.py index bedbccfc0..01b474187 100644 --- a/ersilia/utils/logging.py +++ b/ersilia/utils/logging.py @@ -1,15 +1,18 @@ +import json import os import sys -import json -from pathlib import Path import tempfile +from pathlib import Path + from loguru import logger -from ..default import LOGGING_FILE, CURRENT_LOGGING_FILE, VERBOSE_FILE -from ..utils.session import get_session_dir +from ..default import CURRENT_LOGGING_FILE, LOGGING_FILE, VERBOSE_FILE +from ..utils.session import get_session_dir ROTATION = "10 MB" +# ruff: noqa: D101, D102, F811 + def make_temp_dir(prefix): """ diff --git a/ersilia/utils/paths.py b/ersilia/utils/paths.py index d24a88888..2ef68e04c 100644 --- a/ersilia/utils/paths.py +++ b/ersilia/utils/paths.py @@ -1,18 +1,21 @@ -from dataclasses import dataclass, asdict -import re -import os import json +import os +import re +from dataclasses import asdict, dataclass +from pathlib import Path from typing import List, Optional + import yaml -from pathlib import Path + from ersilia import logger -from .docker import resolve_pack_method_docker + from ..default import ( - PACK_METHOD_BENTOML, - PACK_METHOD_FASTAPI, METADATA_JSON_FILE, METADATA_YAML_FILE, + PACK_METHOD_BENTOML, + PACK_METHOD_FASTAPI, ) +from .docker import resolve_pack_method_docker MODELS_DEVEL_DIRNAME = "models" @@ -215,6 +218,10 @@ def __post_init__(self): class ErsiliaMetadataLoader(yaml.SafeLoader): + """ + Custom YAML loader for Ersilia metadata. + """ + pass diff --git a/ersilia/utils/session.py b/ersilia/utils/session.py index 421c8bf63..1b88661a7 100644 --- a/ersilia/utils/session.py +++ b/ersilia/utils/session.py @@ -1,15 +1,16 @@ +import json import os import shutil + import psutil -import json from ..default import ( - SESSIONS_DIR, - LOGS_DIR, CONTAINER_LOGS_TMP_DIR, - SESSION_JSON, EOS, + LOGS_DIR, MODELS_JSON, + SESSION_JSON, + SESSIONS_DIR, ) diff --git a/ersilia/utils/supp/conda_env_resolve.py b/ersilia/utils/supp/conda_env_resolve.py index 6bd9cd6dd..fd8a63850 100644 --- a/ersilia/utils/supp/conda_env_resolve.py +++ b/ersilia/utils/supp/conda_env_resolve.py @@ -1,5 +1,5 @@ -import sys import os +import sys CHECKSUM_NCHAR = 8 CHECKSUM_FILE = ".conda_checksum" diff --git a/ersilia/utils/system.py b/ersilia/utils/system.py index cb580e585..df30279d9 100644 --- a/ersilia/utils/system.py +++ b/ersilia/utils/system.py @@ -1,5 +1,5 @@ -import platform import os +import platform def is_inside_docker(): diff --git a/ersilia/utils/terminal.py b/ersilia/utils/terminal.py index 0f8f9e91a..e26017ff1 100644 --- a/ersilia/utils/terminal.py +++ b/ersilia/utils/terminal.py @@ -1,21 +1,21 @@ -import os -import subprocess +import csv +import io import json -import tempfile +import os import shutil -import csv +import subprocess + from .logging import logger -import io try: - from inputimeout import inputimeout, TimeoutOccurred + from inputimeout import TimeoutOccurred, inputimeout except: inputimeout = None TimeoutOccurred = None from ..default import VERBOSE_FILE -from ..utils.session import get_session_dir from ..utils.logging import make_temp_dir +from ..utils.session import get_session_dir def is_quiet(): diff --git a/ersilia/utils/tracking.py b/ersilia/utils/tracking.py index e9311e9a6..9a2ed8b3f 100644 --- a/ersilia/utils/tracking.py +++ b/ersilia/utils/tracking.py @@ -1,8 +1,8 @@ import json import os -from .session import get_session_dir, get_session_uuid -from .exceptions_utils.throw_ersilia_exception import throw_ersilia_exception +from .exceptions_utils.throw_ersilia_exception import throw_ersilia_exception +from .session import get_session_dir, get_session_uuid TRACKING_STUB = { "model_id": "", # ID of the model diff --git a/ersilia/utils/uninstall.py b/ersilia/utils/uninstall.py index 52ea7375c..4c458d794 100644 --- a/ersilia/utils/uninstall.py +++ b/ersilia/utils/uninstall.py @@ -2,9 +2,9 @@ import shutil import subprocess +from ..default import BENTOML_PATH, EOS from .conda import SimpleConda from .docker import SimpleDocker -from ..default import EOS, BENTOML_PATH from .logging import logger diff --git a/ersilia/utils/venv.py b/ersilia/utils/venv.py index 5fe8874a5..bf5c53250 100644 --- a/ersilia/utils/venv.py +++ b/ersilia/utils/venv.py @@ -1,17 +1,13 @@ -import tempfile import os import shutil -from .. import ErsiliaBase - -from .terminal import run_command +from .. import ErsiliaBase, logger, throw_ersilia_exception from ..utils.exceptions_utils.fetch_exceptions import ( - VirtualEnvironmentSetupError, ModelPackageInstallError, + VirtualEnvironmentSetupError, ) -from .. import throw_ersilia_exception -from .. import logger from ..utils.logging import make_temp_dir +from .terminal import run_command class SimpleVenv(ErsiliaBase): diff --git a/ersilia/utils/versioning.py b/ersilia/utils/versioning.py index f044b5d8a..966c05e92 100644 --- a/ersilia/utils/versioning.py +++ b/ersilia/utils/versioning.py @@ -1,5 +1,5 @@ -import sys import os +import sys try: from bentoml import __version__ as __bentoml_version__ diff --git a/ersilia/utils/zip.py b/ersilia/utils/zip.py index 71082f791..8c09b3882 100644 --- a/ersilia/utils/zip.py +++ b/ersilia/utils/zip.py @@ -1,6 +1,6 @@ -import zipfile import os import shutil +import zipfile class Zipper(object): diff --git a/pyproject.toml b/pyproject.toml index 668eec33a..1d23352cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,12 +63,14 @@ scipy = { version = "<=1.10.0", optional = true } rdkit-pypi = { version = "*", optional = true } nox = { version = "*", optional = true } rich = { version = "*", optional = true } +ruff = { version = "*", optional = true } +pre-commit = { version = "*", optional = true } [tool.poetry.extras] # Instead of using poetry dependency groups, we use extras to make it pip installable lake = ["isaura"] docs = ["sphinx", "jinja2"] -test = ["pytest", "pytest-asyncio", "pytest-benchmark", "nox", "rich", "fuzzywuzzy", "scipy", "rdkit-pypi"] +test = ["pytest", "pytest-asyncio", "pytest-benchmark", "nox", "rich", "fuzzywuzzy", "scipy", "rdkit-pypi", "ruff", "pre-commit"] #all = [lake, docs, test] [tool.poetry.scripts] diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 000000000..61357243c --- /dev/null +++ b/ruff.toml @@ -0,0 +1,53 @@ +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".github", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "test", + "node_modules", + "site-packages", + "venv", +] + +line-length = 88 +indent-width = 4 + +target-version = "py310" + +[lint] +select = ["D101", "D102", "E4", "E9", "F", "I", "W"] +ignore = ["D104", "D105", "D107"] +fixable = ["ALL", "D"] +unfixable = [] +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[lint.pydocstyle] +convention = "numpy" + +[format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = true +docstring-code-line-length = 50 \ No newline at end of file diff --git a/test/cli/test_catalog.py b/test/cli/test_catalog.py index 7cb3949e2..7e81fb082 100644 --- a/test/cli/test_catalog.py +++ b/test/cli/test_catalog.py @@ -1,6 +1,8 @@ +from unittest.mock import MagicMock, patch + import pytest -from unittest.mock import patch, MagicMock from click.testing import CliRunner + from ersilia.cli.commands.catalog import catalog_cmd diff --git a/test/cli/test_close.py b/test/cli/test_close.py index 35c2dc50d..1f37f934a 100644 --- a/test/cli/test_close.py +++ b/test/cli/test_close.py @@ -1,12 +1,16 @@ import pytest -from unittest import TestCase -from unittest.mock import patch, MagicMock, AsyncMock + from click.testing import CliRunner -from ersilia.core.model import ErsiliaModel + from ersilia.cli.commands.close import close_cmd +from ersilia.core.model import ErsiliaModel from ersilia.core.session import Session +from unittest.mock import patch, AsyncMock MODEL_ID = "eos3b5e" +URL = "http://localhost" +PORT = 8001 +API_NAME = "run" @pytest.fixture @@ -15,6 +19,39 @@ def mock_close(): yield mock_close_ +@pytest.fixture +def mock_fetcher(): + with patch( + "ersilia.hub.fetch.fetch.ModelFetcher.fetch", new_callable=AsyncMock + ) as mock_fetch: + yield mock_fetch + + +@pytest.fixture +def mock_set_apis(): + with patch.object(ErsiliaModel, "_set_apis", return_value=None) as mock_set_apis: + yield mock_set_apis + + +@pytest.fixture +def mock_convn_api_get_apis(): + def mock_get_api_side_effect(): + return [API_NAME] + + with patch.object( + ErsiliaModel, "get_apis", side_effect=mock_get_api_side_effect + ) as mock_get_apis: + yield mock_get_apis + + +@pytest.fixture +def mock_get_url(): + with patch.object( + ErsiliaModel, "_get_url", return_value=f"{URL}:{PORT}" + ) as mock_url: + yield mock_url + + @pytest.fixture def mock_session(): with ( @@ -26,11 +63,17 @@ def mock_session(): yield -def test_close_cmd(mock_close, mock_session): +def test_close_cmd( + mock_close, + mock_fetcher, + mock_session, + mock_set_apis, + mock_convn_api_get_apis, + mock_get_url, +): runner = CliRunner() result = runner.invoke(close_cmd()) assert result.exit_code == 0 - # assert mock_fetcher.called assert mock_close.called diff --git a/test/cli/test_delete.py b/test/cli/test_delete.py index 89fedb25f..1d78cab23 100644 --- a/test/cli/test_delete.py +++ b/test/cli/test_delete.py @@ -1,41 +1,79 @@ +from unittest.mock import MagicMock, patch + import pytest -from unittest.mock import patch, MagicMock from click.testing import CliRunner + from ersilia.cli.commands.delete import delete_cmd -from ersilia.utils.logging import logger +from ersilia.hub.delete.delete import ModelFullDeleter + +MODEL = "eos3b5e" +DUMMY_MODEL = ["eosxxxx"] @pytest.fixture -def runner(): - return CliRunner() +def can_be_deleted(): + with patch.object( + ModelFullDeleter, "can_be_deleted", return_value=(True, "") + ) as can_be_deleted: + yield can_be_deleted + + +@patch("ersilia.ModelBase") +@patch("ersilia.hub.delete.delete.ModelFullDeleter") +@patch("ersilia.cli.echo") +def test_delete_specific_model(mock_echo, mock_deleter, mock_modelbase, can_be_deleted): + mock_modelbase_instance = MagicMock() + mock_modelbase_instance.model_id = MODEL # + mock_modelbase.return_value = mock_modelbase_instance + mock_deleter_instance = MagicMock() + mock_deleter_instance.can_be_deleted.return_value = (False, "") + mock_deleter_instance.delete.return_value = None + mock_deleter.return_value = mock_deleter_instance -MODEL_ID = "eos3b5e" + mock_echo.return_value = None + runner = CliRunner() + result = runner.invoke(delete_cmd(), [MODEL]) -@patch("ersilia.core.modelbase.ModelBase") -@pytest.mark.parametrize("model", [(MODEL_ID)]) -def test_delete_model( - mock_model_base, - runner, - model, -): - mock_model_instance = MagicMock() - mock_model_instance.model_id = model - mock_model_base.return_value = mock_model_instance - mock_model_instance.invoke.return_value.exit_code = 0 - mock_model_instance.invoke.return_value.output = f"Deleting model {model}: \n👍 Model {model}\ - deleting cmd successfully executed!\n" + assert ( + result.exit_code == 0 + ), f"Unexpected exit code: {result.exit_code}. Output: {result.output}" - result = runner.invoke(delete_cmd(), [model]) - logger.info(result.output) +@patch("ersilia.hub.content.catalog.ModelCatalog") +@patch("ersilia.hub.delete.delete.ModelFullDeleter") +@patch("ersilia.cli.echo") +def test_delete_all_models(mock_echo, mock_deleter, mock_catalog): + runner = CliRunner() + + mock_catalog_instance = MagicMock() + mock_catalog_instance.local.return_value.data = [[MODEL], [DUMMY_MODEL]] + mock_catalog_instance.local.return_value.columns = ["Identifier"] + mock_catalog.return_value = mock_catalog_instance + + mock_deleter_instance = MagicMock() + mock_deleter_instance.can_be_deleted.return_value = (True, "") + mock_deleter.return_value = mock_deleter_instance + + result = runner.invoke(delete_cmd(), ["delete", "--all"]) assert ( result.exit_code == 0 ), f"Unexpected exit code: {result.exit_code}. Output: {result.output}" -if __name__ == "__main__": +@patch("ersilia.hub.content.catalog.ModelCatalog") +@patch("ersilia.cli.echo") +def test_no_models_available(mock_echo, mock_catalog): runner = CliRunner() - test_delete_model(None, None, runner, MODEL_ID) + + mock_catalog_instance = MagicMock() + mock_catalog_instance.local.return_value = None + mock_catalog.return_value = mock_catalog_instance + + result = runner.invoke(delete_cmd(), ["delete", "--all"]) + + assert ( + result.exit_code == 0 + ), f"Unexpected exit code: {result.exit_code}. Output: {result.output}" diff --git a/test/cli/test_fetch.py b/test/cli/test_fetch.py index aa18f0ed0..af2cde09d 100644 --- a/test/cli/test_fetch.py +++ b/test/cli/test_fetch.py @@ -1,6 +1,8 @@ +from unittest.mock import MagicMock, patch + import pytest -from unittest.mock import patch, MagicMock from click.testing import CliRunner + from ersilia.cli.commands.fetch import fetch_cmd from ersilia.hub.fetch.fetch import FetchResult from ersilia.utils.logging import logger diff --git a/test/cli/test_run.py b/test/cli/test_run.py index e5e8b82f9..4fc77fb44 100644 --- a/test/cli/test_run.py +++ b/test/cli/test_run.py @@ -1,14 +1,16 @@ -import pytest import random import time -from unittest.mock import patch, Mock, AsyncMock, PropertyMock +from unittest.mock import AsyncMock, patch + +import pytest from click.testing import CliRunner + from ersilia.cli.commands.run import run_cmd -from ersilia.serve.standard_api import StandardCSVRunApi -from ersilia.core.session import Session from ersilia.core.model import ErsiliaModel +from ersilia.core.session import Session +from ersilia.serve.standard_api import StandardCSVRunApi from ersilia.utils.logging import logger -from ersilia.hub.fetch.fetch import ModelFetcher + from .utils import create_compound_input_csv URL = "http://localhost" diff --git a/test/cli/test_serve.py b/test/cli/test_serve.py index 88c7a5a2d..3efb5638c 100644 --- a/test/cli/test_serve.py +++ b/test/cli/test_serve.py @@ -1,19 +1,20 @@ +from unittest.mock import MagicMock, patch + import pytest -from unittest import TestCase -from unittest.mock import patch, MagicMock, AsyncMock from click.testing import CliRunner -from ersilia.core.model import ErsiliaModel + from ersilia.cli.commands.serve import serve_cmd -from ersilia.store.utils import store_has_model +from ersilia.core.model import ErsiliaModel +from ersilia.hub.fetch.register.standard_example import ModelStandardExample URL = "http://localhost" MODEL_ID = "eos3b5e" @pytest.fixture -def mock_set_apis(): - with patch.object(ErsiliaModel, "_set_apis", return_value=None) as mock_set_apis: - yield mock_set_apis +def mock_std_example(): + with patch.object(ModelStandardExample, "run", return_value=None) as mock_run: + yield mock_run @pytest.fixture @@ -24,12 +25,15 @@ def mock_serve(): @patch("ersilia.core.model.ErsiliaModel") @patch("ersilia.store.utils.store_has_model", return_value=False) -def test_serve_cmd(mock_store_has_model, mock_ersilia_model, mock_set_apis, mock_serve): +def test_serve_cmd( + mock_store_has_model, mock_ersilia_model, mock_serve, mock_std_example +): runner = CliRunner() mock_mdl_instance = MagicMock() mock_mdl_instance.is_valid.return_value = True mock_mdl_instance.url = URL - mock_mdl_instance.model_id = MODEL_ID + mock_mdl_instance.model = MODEL_ID + mock_mdl_instance.service_class = MODEL_ID mock_mdl_instance.slug = "molecular-weight" mock_mdl_instance.pid = 1234 mock_mdl_instance.scl = "pulled_docker" diff --git a/test/models/eos0t01/pack.py b/test/models/eos0t01/pack.py index 172416e13..cae8e9020 100644 --- a/test/models/eos0t01/pack.py +++ b/test/models/eos0t01/pack.py @@ -1,6 +1,5 @@ from src.service import Service - service = Service() service.pack("model", None) service.save() diff --git a/test/models/eos0t01/src/service.py b/test/models/eos0t01/src/service.py index 8f8515581..cfa63874f 100644 --- a/test/models/eos0t01/src/service.py +++ b/test/models/eos0t01/src/service.py @@ -1,11 +1,10 @@ import random - from typing import List from bentoml import BentoService, api, artifacts -from bentoml.types import JsonSerializable from bentoml.adapters import JsonInput from bentoml.service.artifacts.common import JSONArtifact +from bentoml.types import JsonSerializable @artifacts([JSONArtifact("model")]) diff --git a/test/models/eos0t02/pack.py b/test/models/eos0t02/pack.py index 172416e13..cae8e9020 100644 --- a/test/models/eos0t02/pack.py +++ b/test/models/eos0t02/pack.py @@ -1,6 +1,5 @@ from src.service import Service - service = Service() service.pack("model", None) service.save() diff --git a/test/models/eos0t02/src/service.py b/test/models/eos0t02/src/service.py index 1044b849c..94c27be39 100644 --- a/test/models/eos0t02/src/service.py +++ b/test/models/eos0t02/src/service.py @@ -1,13 +1,11 @@ -import random -import json import collections -from pysmiles import read_smiles from typing import List from bentoml import BentoService, api, artifacts from bentoml.adapters import JsonInput from bentoml.service.artifacts.common import JSONArtifact from bentoml.types import JsonSerializable +from pysmiles import read_smiles @artifacts([JSONArtifact("model")]) diff --git a/test/models/eos0t03/pack.py b/test/models/eos0t03/pack.py index 172416e13..cae8e9020 100644 --- a/test/models/eos0t03/pack.py +++ b/test/models/eos0t03/pack.py @@ -1,6 +1,5 @@ from src.service import Service - service = Service() service.pack("model", None) service.save() diff --git a/test/models/eos0t03/src/service.py b/test/models/eos0t03/src/service.py index dcbdf73ff..5b573c7af 100644 --- a/test/models/eos0t03/src/service.py +++ b/test/models/eos0t03/src/service.py @@ -1,14 +1,11 @@ -import random -import json -import collections -from rdkit import Chem -from rdkit.Chem import Descriptors from typing import List from bentoml import BentoService, api, artifacts from bentoml.adapters import JsonInput from bentoml.service.artifacts.common import JSONArtifact from bentoml.types import JsonSerializable +from rdkit import Chem +from rdkit.Chem import Descriptors @artifacts([JSONArtifact("model")]) diff --git a/test/models/eos0t04/model/framework/code/main.py b/test/models/eos0t04/model/framework/code/main.py index f351f0ddf..c081ed24a 100644 --- a/test/models/eos0t04/model/framework/code/main.py +++ b/test/models/eos0t04/model/framework/code/main.py @@ -1,7 +1,7 @@ # imports -import os import csv import json +import os import sys # parse arguments diff --git a/test/models/eos0t04/pack.py b/test/models/eos0t04/pack.py index 9ddfeba7b..75a8e2573 100644 --- a/test/models/eos0t04/pack.py +++ b/test/models/eos0t04/pack.py @@ -1,7 +1,6 @@ import os -from src.service import load_model -from src.service import Service -from src.service import CHECKPOINTS_BASEDIR, FRAMEWORK_BASEDIR + +from src.service import CHECKPOINTS_BASEDIR, FRAMEWORK_BASEDIR, Service, load_model root = os.path.dirname(os.path.realpath(__file__)) mdl = load_model( diff --git a/test/models/eos0t04/src/service.py b/test/models/eos0t04/src/service.py index db11cd393..13b349c0e 100644 --- a/test/models/eos0t04/src/service.py +++ b/test/models/eos0t04/src/service.py @@ -1,17 +1,15 @@ +import json +import os +import pickle +import shutil +import subprocess +import tempfile from typing import List from bentoml import BentoService, api, artifacts from bentoml.adapters import JsonInput -from bentoml.types import JsonSerializable from bentoml.service import BentoServiceArtifact - -import pickle -import os -import shutil -import tempfile -import subprocess -import json - +from bentoml.types import JsonSerializable CHECKPOINTS_BASEDIR = "checkpoints" FRAMEWORK_BASEDIR = "framework" diff --git a/test/playground/commands.py b/test/playground/commands.py index 5570ba6e7..18bc201d4 100644 --- a/test/playground/commands.py +++ b/test/playground/commands.py @@ -1,14 +1,14 @@ -import pytest import subprocess import time +from pathlib import Path + import psutil -import re -import json +import pytest import yaml from rich.text import Text -from pathlib import Path -from .shared import results + from .rules import get_rule +from .shared import results from .utils import ( create_compound_input_csv, get_command_names, diff --git a/test/playground/noxfile.py b/test/playground/noxfile.py index 7ad263cd2..0731dc18b 100644 --- a/test/playground/noxfile.py +++ b/test/playground/noxfile.py @@ -1,7 +1,9 @@ -import nox -import yaml import shutil from pathlib import Path + +import nox +import yaml + from ersilia.utils.logging import logger ORIGINAL_DIR = Path.cwd() @@ -97,7 +99,7 @@ def test_fetch_multiple_models(session): "fetch_flags": "--from_dockerhub", } ) - logger.info(f"Fetching and Serving Multiple Models: Fetching") + logger.info("Fetching and Serving Multiple Models: Fetching") session.run("pytest", "commands.py", "-v", silent=False) @@ -107,7 +109,7 @@ def test_serve_multiple_models(session): update_yaml_values( {"runner": "multiple", "cli_type": "serve", "delete_model": False} ) - logger.info(f"Fetching and Serving Multiple Models: Serving") + logger.info("Fetching and Serving Multiple Models: Serving") session.run("pytest", "commands.py", "-v", silent=False) @@ -125,5 +127,5 @@ def test_conventional_run(session): "delete_model": True, } ) - logger.info(f"Standard and Conventional Run: Conventional") + logger.info("Standard and Conventional Run: Conventional") session.run("pytest", "commands.py", "-v", silent=False) diff --git a/test/playground/rules.py b/test/playground/rules.py index fd6a24e1e..943447069 100644 --- a/test/playground/rules.py +++ b/test/playground/rules.py @@ -1,5 +1,5 @@ -import json import csv +import json from pathlib import Path RULE_REGISTRY = {} diff --git a/test/playground/runner.py b/test/playground/runner.py index 6e2c0b43b..4c92dc485 100644 --- a/test/playground/runner.py +++ b/test/playground/runner.py @@ -1,7 +1,8 @@ import subprocess -import yaml from pathlib import Path +import yaml + class NoxSession: def __init__(self, name): diff --git a/test/test_catalog.py b/test/test_catalog.py index 6bdba8d2f..df6661210 100644 --- a/test/test_catalog.py +++ b/test/test_catalog.py @@ -1,6 +1,8 @@ import json import os + import pytest + from ersilia.hub.content.catalog import CatalogTable diff --git a/test/test_content.py b/test/test_content.py index 05bdc3f7a..b0150f78b 100644 --- a/test/test_content.py +++ b/test/test_content.py @@ -1,5 +1,5 @@ -from ersilia.hub.content.catalog import ModelCatalog from ersilia.hub.content.card import ModelCard +from ersilia.hub.content.catalog import ModelCatalog MODEL_ID = "eos0t01" diff --git a/test/test_inputs.py b/test/test_inputs.py index 8d0efb667..16413723c 100644 --- a/test/test_inputs.py +++ b/test/test_inputs.py @@ -1,18 +1,19 @@ import os import sys + from ersilia.io.input import GenericInputAdapter root = os.path.abspath(os.path.dirname(__file__)) inputs_path = os.path.abspath(os.path.join(root, "inputs")) sys.path.append(inputs_path) -from compound_single import smiles as compound_single_input -from compound_singles import smiles as compound_singles_input from compound_list import smiles_list as compound_list_input from compound_lists import smiles_lists as compound_lists_input from compound_pair_of_lists import smiles_pair_of_lists as compound_pair_of_lists_input from compound_pairs_of_lists import ( smiles_pairs_of_lists as compound_pairs_of_lists_input, ) +from compound_single import smiles as compound_single_input +from compound_singles import smiles as compound_singles_input def test_compound_single(): diff --git a/test/test_models.py b/test/test_models.py index 91e1dd490..ad71b0a4a 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,10 +1,12 @@ import asyncio -import pytest import random -from unittest.mock import patch, AsyncMock -from ersilia.hub.fetch.fetch import ModelFetcher +from unittest.mock import AsyncMock, patch + +import pytest + from ersilia import ErsiliaModel from ersilia.core.session import Session +from ersilia.hub.fetch.fetch import ModelFetcher MODELS = ["eos0t01", "eos3b5e", "eos0t03", "eos0t04"] RESULTS = [0, 312.89, 0, 0] diff --git a/test/test_url_search.py b/test/test_url_search.py index 46d053edc..50e92a49a 100644 --- a/test/test_url_search.py +++ b/test/test_url_search.py @@ -1,4 +1,5 @@ import pytest + from ersilia.db.hubdata.interfaces import JsonModelsInterface ji = JsonModelsInterface(config_json=None)