From cd25fdc979092ed1b6e8604bd7e5420afabb3dc4 Mon Sep 17 00:00:00 2001 From: Ivan Murabito Date: Wed, 22 Jan 2025 09:46:42 +0000 Subject: [PATCH 1/4] feat: switch to ruff --- .pre-commit-config.yaml | 39 ++++++----------------- .vscode/settings.json | 5 ++- Makefile | 12 ++++--- notebooks/playground.ipynb | 64 +++++++++++++++++++++----------------- pyproject.toml | 25 ++++++++++++--- 5 files changed, 76 insertions(+), 69 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0d6aeb3..1b81e60 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,12 +4,10 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - - id: check-ast - id: check-toml - id: check-yaml args: - --unsafe - #- id: check-json - id: check-case-conflict - id: check-merge-conflict - id: end-of-file-fixer @@ -17,36 +15,17 @@ repos: - id: detect-private-key - id: requirements-txt-fixer - # - repo: https://github.com/pre-commit/mirrors-mypy - # rev: v0.991 - # hooks: - # - id: mypy - # args: [--ignore-missing-imports, --pretty, --show-error-codes] - - repo: https://github.com/nbQA-dev/nbQA # run lint on notebook - rev: 1.9.1 - hooks: - - id: nbqa-black - - id: nbqa-pyupgrade - # - id: nbqa-isort - - repo: https://github.com/asottile/pyupgrade - rev: v3.19.1 - hooks: - - id: pyupgrade - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - name: isort (python) - args: [--profile, black] - - repo: https://github.com/psf/black - rev: 24.10.0 - hooks: - - id: black - repo: https://github.com/kynan/nbstripout rev: 0.6.0 hooks: - id: nbstripout - - repo: https://github.com/hadialqattan/pycln # remove unused import - rev: v2.5.0 + + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.9.2 hooks: - - id: pycln + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format diff --git a/.vscode/settings.json b/.vscode/settings.json index 7f5c3d9..04af413 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,5 +5,8 @@ "tests" ], "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff" + }, } diff --git a/Makefile b/Makefile index 4a7d562..f0b8550 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: test install install-dev install-pre-commit run-pre-commit .uv .pre-commit tox +.PHONY: venv test install install-dev install-pre-commit run-pre-commit .uv .pre-commit tox .uv: ## Check that uv is installed @uv --version || echo 'Please install uv: https://docs.astral.sh/uv/getting-started/installation/' @@ -6,19 +6,21 @@ .pre-commit: ## Check that pre-commit is installed @pre-commit -V || echo 'Please install pre-commit: https://pre-commit.com/' +venv: + @uv venv --python=python3.12 + install: .uv .pre-commit - @uv venv @uv pip install -e ".[cpu,dev]" @pre-commit install install-gpu: .uv .pre-commit - @uv venv @uv pip install -e ".[dev,gpu]" @pre-commit install lint: - @isort ./focoos ./tests --profile=black - @black ./focoos ./tests + @ruff check ./corpus ./tests ./ops ./ray_apps --fix + @ruff format ./corpus ./tests ./ops ./ray_apps + run-pre-commit: .pre-commit @pre-commit run --all-files diff --git a/notebooks/playground.ipynb b/notebooks/playground.ipynb index fcce448..186ed49 100644 --- a/notebooks/playground.ipynb +++ b/notebooks/playground.ipynb @@ -29,9 +29,10 @@ "metadata": {}, "outputs": [], "source": [ - "from focoos import Focoos, DEV_API_URL, LOCAL_API_URL\n", - "from pprint import pprint\n", "import os\n", + "from pprint import pprint\n", + "\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", @@ -51,11 +52,12 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", "import os\n", - "from focoos import Focoos, DEV_API_URL\n", + "from pprint import pprint\n", + "\n", "from supervision import plot_image\n", "\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", "model_ref = \"focoos_object365\"\n", "image_path = \"./assets/ade_val_034.jpg\"\n", @@ -111,10 +113,11 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "import os\n", "from pprint import pprint\n", + "\n", "from supervision import plot_image\n", + "\n", "from focoos import Focoos\n", "\n", "focoos = Focoos(\n", @@ -149,12 +152,12 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from focoos import Focoos\n", "import os\n", "from pprint import pprint\n", + "\n", "from supervision import plot_image\n", "\n", + "from focoos import Focoos\n", "from focoos.ports import RuntimeTypes\n", "\n", "focoos = Focoos(\n", @@ -196,11 +199,12 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", "import os\n", - "from focoos import Focoos, DEV_API_URL\n", + "from pprint import pprint\n", + "\n", "from dotenv import load_dotenv\n", "\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", "load_dotenv()\n", "\n", @@ -223,19 +227,18 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", "import os\n", - "from focoos import DeploymentMode, Focoos, DEV_API_URL\n", + "from pprint import pprint\n", + "\n", "from dotenv import load_dotenv\n", "\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", "load_dotenv()\n", "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", - "model = focoos.new_model(\n", - " name=\"test-model\", focoos_model=\"focoos_object365\", description=\"Test model\"\n", - ")\n", + "model = focoos.new_model(name=\"test-model\", focoos_model=\"focoos_object365\", description=\"Test model\")\n", "### Get Model Info" ] }, @@ -252,11 +255,12 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", "import os\n", - "from focoos import Focoos, DEV_API_URL\n", + "from pprint import pprint\n", + "\n", "from dotenv import load_dotenv\n", "\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", "load_dotenv()\n", "\n", @@ -279,13 +283,13 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", "import os\n", - "from focoos import Focoos\n", - "from dotenv import load_dotenv\n", - "import cv2\n", - "import numpy as np\n", + "from pprint import pprint\n", + "\n", "import supervision as sv\n", + "from dotenv import load_dotenv\n", + "\n", + "from focoos import Focoos\n", "\n", "load_dotenv()\n", "\n", @@ -314,11 +318,12 @@ "metadata": {}, "outputs": [], "source": [ - "from focoos import Focoos, DEV_API_URL\n", "import os\n", "from pprint import pprint\n", + "\n", "from supervision import plot_image\n", "\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", "focoos = Focoos(\n", " api_key=os.getenv(\"FOCOOS_API_KEY\"),\n", @@ -358,11 +363,12 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", "import os\n", - "from focoos import Focoos, DEV_API_URL\n", + "from pprint import pprint\n", + "\n", "from dotenv import load_dotenv\n", "\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", "load_dotenv()\n", "\n", @@ -371,9 +377,7 @@ "models = focoos.list_models()\n", "pprint(models)\n", "\n", - "model = focoos.new_model(\n", - " name=\"test-model-2\", focoos_model=\"focoos_object365\", description=\"Test model\"\n", - ")\n", + "model = focoos.new_model(name=\"test-model-2\", focoos_model=\"focoos_object365\", description=\"Test model\")\n", "\n", "\n", "model = focoos.get_remote_model(\"fa94df6806c84c11\")\n", @@ -415,10 +419,11 @@ "metadata": {}, "outputs": [], "source": [ - "completed_status = [\"Completed\", \"Failed\"]\n", "import time\n", + "\n", "from focoos.utils.logger import get_logger\n", "\n", + "completed_status = [\"Completed\", \"Failed\"]\n", "logger = get_logger(__name__)\n", "model = focoos.get_remote_model(\"fa94df6806c84c11\")\n", "status = model.train_status()\n", @@ -452,10 +457,11 @@ "metadata": {}, "outputs": [], "source": [ - "from focoos import Focoos, DEV_API_URL\n", "import os\n", "from pprint import pprint\n", "\n", + "from focoos import DEV_API_URL, Focoos\n", + "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", "user_info = focoos.get_user_info()\n", diff --git a/pyproject.toml b/pyproject.toml index 14a2b2b..c0351e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,24 @@ requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" + + +[tool.ruff] +line-length = 120 +lint.select = ["E", "F"] +lint.extend-select = ["I"] +lint.ignore = ["E501"] +fix = true + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +docstring-code-format = true +docstring-code-line-length = 160 + +[tool.ruff.lint.pydocstyle] +convention = "google" + [tool.setuptools.packages.find] include = ["focoos**"] @@ -14,7 +32,8 @@ requires-python = ">=3.10" dependencies = [ "requests", "Pillow~=10.4.0", - "supervision~=0.26.0rc2", + "supervision~=0.25.1", + "opencv-python~=4.11.0", "pydantic~=2.10.5", "pydantic-settings~=2.7.1", "tqdm~=4.67.1", @@ -41,9 +60,7 @@ dev = [ "pytest", "pytest-cov", "pytest-mock", - "isort", - "black", - "flake8", + "ruff", "python-dotenv", "gradio~=5.10.0", "pre-commit~=4.0.1", From 070b365887d44a6b760dbb5079e0cca6ff4a4c5f Mon Sep 17 00:00:00 2001 From: Ivan Murabito Date: Wed, 22 Jan 2025 10:44:40 +0000 Subject: [PATCH 2/4] style: lint and format --- Makefile | 5 +- focoos/focoos.py | 28 +++------- focoos/local_model.py | 20 +++----- focoos/ports.py | 42 ++++++--------- focoos/remote_model.py | 64 ++++++----------------- focoos/runtime.py | 40 ++++----------- focoos/utils/logger.py | 2 +- focoos/utils/system.py | 8 +-- focoos/utils/vision.py | 27 +++------- notebooks/.dataset.ipynb | 2 +- notebooks/.monitor.ipynb | 4 +- notebooks/concrete.ipynb | 13 ++--- notebooks/utils.py | 14 ++--- tests/helpers.py | 8 +-- tests/test_focoos.py | 101 +++++++++---------------------------- tests/test_local_model.py | 28 +++------- tests/test_ports.py | 5 +- tests/test_remote_model.py | 68 +++++++------------------ tests/test_runtime.py | 8 +-- tests/test_system.py | 8 +-- tests/utils/test_vision.py | 28 +++------- 21 files changed, 140 insertions(+), 383 deletions(-) diff --git a/Makefile b/Makefile index f0b8550..eeb3612 100644 --- a/Makefile +++ b/Makefile @@ -18,9 +18,8 @@ install-gpu: .uv .pre-commit @pre-commit install lint: - @ruff check ./corpus ./tests ./ops ./ray_apps --fix - @ruff format ./corpus ./tests ./ops ./ray_apps - + @ruff check ./focoos ./tests ./notebooks --fix + @ruff format ./focoos ./tests ./notebooks run-pre-commit: .pre-commit @pre-commit run --all-files diff --git a/focoos/focoos.py b/focoos/focoos.py index 4d91860..1bda7b0 100644 --- a/focoos/focoos.py +++ b/focoos/focoos.py @@ -23,8 +23,8 @@ from focoos.ports import ( DatasetMetadata, ModelMetadata, + ModelNotFound, ModelPreview, - Quotas, RuntimeTypes, User, ) @@ -92,9 +92,7 @@ def __init__( self.http_client = HttpClient(self.api_key, host_url) self.user_info = self.get_user_info() self.cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "focoos") - logger.info( - f"Currently logged as: {self.user_info.email} environment: {host_url}" - ) + logger.info(f"Currently logged as: {self.user_info.email} environment: {host_url}") def get_user_info(self) -> User: """ @@ -160,9 +158,7 @@ def list_focoos_models(self) -> list[ModelPreview]: res = self.http_client.get("models/focoos-models") if res.status_code != 200: logger.error(f"Failed to list focoos models: {res.status_code} {res.text}") - raise ValueError( - f"Failed to list focoos models: {res.status_code} {res.text}" - ) + raise ValueError(f"Failed to list focoos models: {res.status_code} {res.text}") return [ModelPreview.from_json(r) for r in res.json()] def get_local_model( @@ -207,9 +203,7 @@ def get_remote_model(self, model_ref: str) -> RemoteModel: """ return RemoteModel(model_ref, self.http_client) - def new_model( - self, name: str, focoos_model: str, description: str - ) -> Optional[RemoteModel]: + def new_model(self, name: str, focoos_model: str, description: str) -> RemoteModel: """ Creates a new model in the Focoos system. @@ -238,7 +232,6 @@ def new_model( logger.warning(f"Model already exists: {name}") return self.get_model_by_name(name, remote=True) logger.warning(f"Failed to create new model: {res.status_code} {res.text}") - return None def list_shared_datasets(self) -> list[DatasetMetadata]: """ @@ -291,12 +284,8 @@ def _download_model(self, model_ref: str) -> str: logger.info("đŸ“¥ Downloading model from Focoos Cloud.. ") response = self.http_client.get_external_url(download_uri, stream=True) if response.status_code != 200: - logger.error( - f"Failed to download model: {response.status_code} {response.text}" - ) - raise ValueError( - f"Failed to download model: {response.status_code} {response.text}" - ) + logger.error(f"Failed to download model: {response.status_code} {response.text}") + raise ValueError(f"Failed to download model: {response.status_code} {response.text}") total_size = int(response.headers.get("content-length", 0)) logger.info(f"đŸ“¥ Size: {total_size / (1024**2):.2f} MB") @@ -339,9 +328,7 @@ def get_dataset_by_name(self, name: str) -> Optional[DatasetMetadata]: if name_lower == dataset.name.lower(): return dataset - def get_model_by_name( - self, name: str, remote: bool = True - ) -> Optional[Union[RemoteModel, LocalModel]]: + def get_model_by_name(self, name: str, remote: bool = True) -> Union[RemoteModel, LocalModel]: """ Retrieves a model by its name. @@ -360,3 +347,4 @@ def get_model_by_name( return self.get_remote_model(model.ref) else: return self.get_local_model(model.ref) + raise ModelNotFound(f"Model not found: {name}") diff --git a/focoos/local_model.py b/focoos/local_model.py index 5571244..0a37fbe 100644 --- a/focoos/local_model.py +++ b/focoos/local_model.py @@ -126,24 +126,18 @@ def _annotate(self, im: np.ndarray, detections: sv.Detections) -> np.ndarray: """ classes = self.metadata.classes labels = [ - f"{classes[int(class_id)] if classes is not None else str(class_id)}: {confid*100:.0f}%" + f"{classes[int(class_id)] if classes is not None else str(class_id)}: {confid * 100:.0f}%" for class_id, confid in zip(detections.class_id, detections.confidence) # type: ignore ] if self.metadata.task == FocoosTask.DETECTION: - annotated_im = self.box_annotator.annotate( - scene=im.copy(), detections=detections - ) + annotated_im = self.box_annotator.annotate(scene=im.copy(), detections=detections) - annotated_im = self.label_annotator.annotate( - scene=annotated_im, detections=detections, labels=labels - ) + annotated_im = self.label_annotator.annotate(scene=annotated_im, detections=detections, labels=labels) elif self.metadata.task in [ FocoosTask.SEMSEG, FocoosTask.INSTANCE_SEGMENTATION, ]: - annotated_im = self.mask_annotator.annotate( - scene=im.copy(), detections=detections - ) + annotated_im = self.mask_annotator.annotate(scene=im.copy(), detections=detections) return annotated_im def infer( @@ -185,10 +179,8 @@ def infer( detections = self.runtime(im1.astype(np.float32), threshold) t2 = perf_counter() if resize: - detections = scale_detections( - detections, (resize, resize), (im0.shape[1], im0.shape[0]) - ) - logger.debug(f"Inference time: {t2-t1:.3f} seconds") + detections = scale_detections(detections, (resize, resize), (im0.shape[1], im0.shape[0])) + logger.debug(f"Inference time: {t2 - t1:.3f} seconds") im = None if annotate: im = self._annotate(im0, detections) diff --git a/focoos/ports.py b/focoos/ports.py index 32d2e32..6c7bf28 100644 --- a/focoos/ports.py +++ b/focoos/ports.py @@ -7,9 +7,7 @@ from pydantic import BaseModel, Field, field_validator -S3_URL_REGEX = re.compile( - r"^s3://" r"(?P[a-zA-Z0-9.-]+)/" r"(?P.+(\.tar\.gz|\.zip)?)$" -) +S3_URL_REGEX = re.compile(r"^s3://" r"(?P[a-zA-Z0-9.-]+)/" r"(?P.+(\.tar\.gz|\.zip)?)$") DEV_API_URL = "https://api.dev.focoos.ai/v0" PROD_API_URL = "https://api.focoos.ai/v0" @@ -70,13 +68,9 @@ class Hyperparameters(FocoosBaseModel): ] = 500 max_iters: Annotated[ int, - Field( - 1500, ge=100, le=100000, description="Maximum number of training iterations" - ), + Field(1500, ge=100, le=100000, description="Maximum number of training iterations"), ] = 1500 - resolution: Annotated[ - int, Field(640, description="Model expected resolution", ge=128, le=6400) - ] = 640 + resolution: Annotated[int, Field(640, description="Model expected resolution", ge=128, le=6400)] = 640 wandb_project: Annotated[ Optional[str], Field(description="Wandb project name must be like ORG_ID/PROJECT_NAME"), @@ -88,21 +82,13 @@ class Hyperparameters(FocoosBaseModel): ] = 5e-4 decoder_multiplier: Annotated[float, Field(description="Backbone multiplier")] = 1 backbone_multiplier: float = 0.1 - amp_enabled: Annotated[ - bool, Field(description="Enable automatic mixed precision") - ] = True + amp_enabled: Annotated[bool, Field(description="Enable automatic mixed precision")] = True weight_decay: Annotated[float, Field(description="Weight decay")] = 0.02 - ema_enabled: Annotated[ - bool, Field(description="Enable EMA (exponential moving average)") - ] = False + ema_enabled: Annotated[bool, Field(description="Enable EMA (exponential moving average)")] = False ema_decay: Annotated[float, Field(description="EMA decay rate")] = 0.999 ema_warmup: Annotated[int, Field(description="EMA warmup")] = 100 - freeze_bn: Annotated[ - bool, Field(description="Freeze batch normalization layers") - ] = False - freeze_bn_bkb: Annotated[ - bool, Field(description="Freeze backbone batch normalization layers") - ] = True + freeze_bn: Annotated[bool, Field(description="Freeze batch normalization layers")] = False + freeze_bn_bkb: Annotated[bool, Field(description="Freeze backbone batch normalization layers")] = True optimizer: Literal["ADAMW", "SGD", "RMSPROP"] = "ADAMW" scheduler: Literal["POLY", "FIXED", "COSINE", "MULTISTEP"] = "MULTISTEP" @@ -119,9 +105,7 @@ def validate_wandb_project(cls, value): if value is not None: # Define a regex pattern to match valid characters if not re.match(r"^[\w.-/]+$", value): - raise ValueError( - "Wandb project name must only contain characters, dashes, underscores, and dots." - ) + raise ValueError("Wandb project name must only contain characters, dashes, underscores, and dots.") return value @@ -298,9 +282,7 @@ def pretty_print(self): else: for item in value: print(f" - {item}") - elif ( - isinstance(value, dict) and key == "packages_versions" - ): # Special formatting for packages_versions + elif isinstance(value, dict) and key == "packages_versions": # Special formatting for packages_versions print(f"{key}:") for pkg_name, pkg_version in value.items(): print(f" - {pkg_name}: {pkg_version}") @@ -336,3 +318,9 @@ class User(FocoosBaseModel): company: Optional[str] = None api_key: ApiKey quotas: Quotas + + +class ModelNotFound(Exception): + def __init__(self, message: str): + self.message = message + super().__init__(self.message) diff --git a/focoos/remote_model.py b/focoos/remote_model.py index 4793c76..320974d 100644 --- a/focoos/remote_model.py +++ b/focoos/remote_model.py @@ -164,9 +164,7 @@ def train_status(self) -> dict | None: res = self.http_client.get(f"models/{self.model_ref}/train/status") if res.status_code != 200: logger.error(f"Failed to get train status: {res.status_code} {res.text}") - raise ValueError( - f"Failed to get train status: {res.status_code} {res.text}" - ) + raise ValueError(f"Failed to get train status: {res.status_code} {res.text}") return res.json() def train_logs(self) -> list[str]: @@ -207,29 +205,23 @@ def _annotate(self, im: np.ndarray, detections: sv.Detections) -> np.ndarray: classes = self.metadata.classes if classes is not None: labels = [ - f"{classes[int(class_id)]}: {confid*100:.0f}%" + f"{classes[int(class_id)]}: {confid * 100:.0f}%" for class_id, confid in zip(detections.class_id, detections.confidence) ] else: labels = [ - f"{str(class_id)}: {confid*100:.0f}%" + f"{str(class_id)}: {confid * 100:.0f}%" for class_id, confid in zip(detections.class_id, detections.confidence) ] if self.metadata.task == FocoosTask.DETECTION: - annotated_im = self.box_annotator.annotate( - scene=im.copy(), detections=detections - ) + annotated_im = self.box_annotator.annotate(scene=im.copy(), detections=detections) - annotated_im = self.label_annotator.annotate( - scene=annotated_im, detections=detections, labels=labels - ) + annotated_im = self.label_annotator.annotate(scene=annotated_im, detections=detections, labels=labels) elif self.metadata.task in [ FocoosTask.SEMSEG, FocoosTask.INSTANCE_SEGMENTATION, ]: - annotated_im = self.mask_annotator.annotate( - scene=im.copy(), detections=detections - ) + annotated_im = self.mask_annotator.annotate(scene=im.copy(), detections=detections) return annotated_im def infer( @@ -277,11 +269,9 @@ def infer( ) t1 = time.time() if res.status_code == 200: - logger.debug(f"Inference time: {t1-t0:.3f} seconds") + logger.debug(f"Inference time: {t1 - t0:.3f} seconds") detections = FocoosDetections( - detections=[ - FocoosDet.from_json(d) for d in res.json().get("detections", []) - ], + detections=[FocoosDet.from_json(d) for d in res.json().get("detections", [])], latency=res.json().get("latency", None), ) preview = None @@ -338,33 +328,15 @@ def _log_metrics(self): """ metrics = self.train_metrics() if metrics: - iter = ( - metrics["iter"][-1] - if "iter" in metrics and len(metrics["iter"]) > 0 - else -1 - ) - total_loss = ( - metrics["total_loss"][-1] - if "total_loss" in metrics and len(metrics["total_loss"]) > 0 - else -1 - ) + iter = metrics["iter"][-1] if "iter" in metrics and len(metrics["iter"]) > 0 else -1 + total_loss = metrics["total_loss"][-1] if "total_loss" in metrics and len(metrics["total_loss"]) > 0 else -1 if self.metadata.task == FocoosTask.SEMSEG: - accuracy = ( - metrics["mIoU"][-1] - if "mIoU" in metrics and len(metrics["mIoU"]) > 0 - else "-" - ) + accuracy = metrics["mIoU"][-1] if "mIoU" in metrics and len(metrics["mIoU"]) > 0 else "-" eval_metric = "mIoU" else: - accuracy = ( - metrics["AP50"][-1] - if "AP50" in metrics and len(metrics["AP50"]) > 0 - else "-" - ) + accuracy = metrics["AP50"][-1] if "AP50" in metrics and len(metrics["AP50"]) > 0 else "-" eval_metric = "AP50" - logger.info( - f"Iter {iter:.0f}: Loss {total_loss:.2f}, {eval_metric} {accuracy}" - ) + logger.info(f"Iter {iter:.0f}: Loss {total_loss:.2f}, {eval_metric} {accuracy}") def monitor_train(self, update_period=30) -> None: """ @@ -405,12 +377,10 @@ def monitor_train(self, update_period=30) -> None: if status["main_status"] in ["InProgress"]: if prev_status["secondary_status"] != status["secondary_status"]: if status["secondary_status"] in ["Starting", "Pending"]: - logger.info( - f"[0s] {status['main_status']}: {status['secondary_status']}" - ) + logger.info(f"[0s] {status['main_status']}: {status['secondary_status']}") else: logger.info( - f"[{elapsed//60}m:{elapsed%60}s] {status['main_status']}: {status['secondary_status']}" + f"[{elapsed // 60}m:{elapsed % 60}s] {status['main_status']}: {status['secondary_status']}" ) if status["secondary_status"] in ["Training"]: self._log_metrics() @@ -442,9 +412,7 @@ def stop_training(self) -> None: res = self.http_client.delete(f"models/{self.model_ref}/train") if res.status_code != 200: logger.error(f"Failed to get stop training: {res.status_code} {res.text}") - raise ValueError( - f"Failed to get stop training: {res.status_code} {res.text}" - ) + raise ValueError(f"Failed to get stop training: {res.status_code} {res.text}") def delete_model(self) -> None: """ diff --git a/focoos/runtime.py b/focoos/runtime.py index 0179e8e..87e8182 100644 --- a/focoos/runtime.py +++ b/focoos/runtime.py @@ -36,9 +36,7 @@ GPU_ID = 0 -def det_postprocess( - out: List[np.ndarray], im0_shape: Tuple[int, int], conf_threshold: float -) -> sv.Detections: +def det_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_threshold: float) -> sv.Detections: """ Postprocesses the output of an object detection model and filters detections based on a confidence threshold. @@ -63,9 +61,7 @@ def det_postprocess( ) -def semseg_postprocess( - out: List[np.ndarray], im0_shape: Tuple[int, int], conf_threshold: float -) -> sv.Detections: +def semseg_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_threshold: float) -> sv.Detections: """ Postprocesses the output of a semantic segmentation model and filters based on a confidence threshold. @@ -110,9 +106,7 @@ class ONNXRuntime: binding (Optional[str]): The binding type for the runtime (e.g., CUDA, CPU). """ - def __init__( - self, model_path: str, opts: OnnxEngineOpts, model_metadata: ModelMetadata - ): + def __init__(self, model_path: str, opts: OnnxEngineOpts, model_metadata: ModelMetadata): """ Initializes the ONNXRuntime instance with the specified model and configuration options. @@ -123,17 +117,11 @@ def __init__( """ self.logger = get_logger() self.logger.debug(f"[onnxruntime device] {ort.get_device()}") - self.logger.debug( - f"[onnxruntime available providers] {ort.get_available_providers()}" - ) + self.logger.debug(f"[onnxruntime available providers] {ort.get_available_providers()}") self.name = Path(model_path).stem self.opts = opts self.model_metadata = model_metadata - self.postprocess_fn = ( - det_postprocess - if model_metadata.task == FocoosTask.DETECTION - else semseg_postprocess - ) + self.postprocess_fn = det_postprocess if model_metadata.task == FocoosTask.DETECTION else semseg_postprocess options = ort.SessionOptions() if opts.verbose: options.log_severity_level = 0 @@ -177,9 +165,7 @@ def __init__( # 'use_compiled_network': False} ) ) - options.graph_optimization_level = ( - ort.GraphOptimizationLevel.ORT_DISABLE_ALL - ) + options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL dtype = np.float32 binding = None elif opts.cuda and "CUDAExecutionProvider" in available_providers: @@ -209,9 +195,7 @@ def __init__( self.binding = binding self.ort_sess = ort.InferenceSession(model_path, options, providers=providers) self.active_providers = self.ort_sess.get_providers() - self.logger.info( - f"[onnxruntime] Active providers:{self.ort_sess.get_providers()}" - ) + self.logger.info(f"[onnxruntime] Active providers:{self.ort_sess.get_providers()}") if self.ort_sess.get_inputs()[0].type == "tensor(uint8)": self.dtype = np.uint8 else: @@ -222,7 +206,6 @@ def __init__( np_image = np.random.rand(1, 3, 640, 640).astype(self.dtype) input_name = self.ort_sess.get_inputs()[0].name out_name = [output.name for output in self.ort_sess.get_outputs()] - t0 = perf_counter() if self.binding is not None: io_binding = self.ort_sess.io_binding() io_binding.bind_input( @@ -235,9 +218,7 @@ def __init__( ) io_binding.bind_cpu_input(input_name, np_image) io_binding.bind_output(out_name[0], self.binding) - t0 = perf_counter() self.ort_sess.run_with_iobinding(io_binding) - t1 = perf_counter() io_binding.copy_outputs_to_cpu() else: self.ort_sess.run(out_name, {input_name: np_image}) @@ -278,9 +259,7 @@ def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections: else: out = self.ort_sess.run(out_name, {input_name: im}) - detections = self.postprocess_fn( - out, (im.shape[2], im.shape[3]), conf_threshold - ) + detections = self.postprocess_fn(out, (im.shape[2], im.shape[3]), conf_threshold) return detections def benchmark(self, iterations=20, size=640) -> LatencyMetrics: @@ -323,10 +302,9 @@ def benchmark(self, iterations=20, size=640) -> LatencyMetrics: start = perf_counter() self.ort_sess.run_with_iobinding(io_binding) end = perf_counter() - # out = io_binding.copy_outputs_to_cpu() else: start = perf_counter() - out = self.ort_sess.run(out_name, {input_name: np_input}) + self.ort_sess.run(out_name, {input_name: np_input}) end = perf_counter() if step >= 5: diff --git a/focoos/utils/logger.py b/focoos/utils/logger.py index 47bd0ec..8f9f39f 100644 --- a/focoos/utils/logger.py +++ b/focoos/utils/logger.py @@ -12,7 +12,7 @@ class ColoredFormatter(logging.Formatter): green = "\x1b[1;32m" yellow = "\x1b[1;33m" red = "\x1b[31;20m" - blue = "\x1B[1;34m" + blue = "\x1b[1;34m" light_blue = "\x1b[1;36m" purple = "\x1b[1;35m" bold_red = "\x1b[31;1m" diff --git a/focoos/utils/system.py b/focoos/utils/system.py index 19d4939..6c35a64 100644 --- a/focoos/utils/system.py +++ b/focoos/utils/system.py @@ -48,9 +48,7 @@ def __init__( "user_agent": "focoos/0.0.1", } - def get_external_url( - self, path: str, params: Optional[dict] = None, stream: bool = False - ): + def get_external_url(self, path: str, params: Optional[dict] = None, stream: bool = False): """ Perform a GET request to an external URL. @@ -147,9 +145,7 @@ def get_cuda_version() -> Optional[str]: Optional[str]: The CUDA version if available, otherwise None. """ try: - result = subprocess.run( - ["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) + result = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if result.returncode == 0: output = result.stdout diff --git a/focoos/utils/vision.py b/focoos/utils/vision.py index 0980524..c7503bc 100644 --- a/focoos/utils/vision.py +++ b/focoos/utils/vision.py @@ -84,9 +84,7 @@ def image_preprocess( if resize: _im1 = cv2.resize(im0, (resize, resize)) - im1 = np.ascontiguousarray(_im1.transpose(2, 0, 1)[np.newaxis, :]).astype( - dtype - ) # HWC->1CHW + im1 = np.ascontiguousarray(_im1.transpose(2, 0, 1)[np.newaxis, :]).astype(dtype) # HWC->1CHW return im1, im0 @@ -116,17 +114,13 @@ def scale_mask(mask: np.ndarray, target_shape: tuple) -> np.ndarray: return scaled_mask.astype(bool) -def scale_detections( - detections: sv.Detections, in_shape: tuple, out_shape: tuple -) -> sv.Detections: +def scale_detections(detections: sv.Detections, in_shape: tuple, out_shape: tuple) -> sv.Detections: if in_shape[0] == out_shape[0] and in_shape[1] == out_shape[1]: return detections if detections.xyxy is not None: x_ratio = out_shape[0] / in_shape[0] y_ratio = out_shape[1] / in_shape[1] - detections.xyxy = detections.xyxy * np.array( - [x_ratio, y_ratio, x_ratio, y_ratio] - ) + detections.xyxy = detections.xyxy * np.array([x_ratio, y_ratio, x_ratio, y_ratio]) return detections @@ -137,12 +131,7 @@ def base64mask_to_mask(base64mask: str) -> np.ndarray: def focoos_detections_to_supervision( inference_output: FocoosDetections, ) -> sv.Detections: - xyxy = np.array( - [ - d.bbox if d.bbox is not None else np.empty(4) - for d in inference_output.detections - ] - ) + xyxy = np.array([d.bbox if d.bbox is not None else np.empty(4) for d in inference_output.detections]) class_id = np.array([d.cls_id for d in inference_output.detections]) confidence = np.array([d.conf for d in inference_output.detections]) if xyxy.shape[0] == 0: @@ -191,9 +180,7 @@ def binary_mask_to_base64(binary_mask: np.ndarray) -> str: return encoded_png -def sv_to_focoos_detections( - detections: sv.Detections, classes: Optional[list[str]] = None -) -> FocoosDetections: +def sv_to_focoos_detections(detections: sv.Detections, classes: Optional[list[str]] = None) -> FocoosDetections: """ Convert a list of detections from the supervision format to Focoos detection format. @@ -227,9 +214,7 @@ def sv_to_focoos_detections( bbox=[round(float(x), 2) for x in xyxy], mask=binary_mask_to_base64(mask) if mask is not None else None, conf=round(float(conf), 2) if conf is not None else None, - label=( - classes[cls_id] if classes is not None and cls_id is not None else None - ), + label=(classes[cls_id] if classes is not None and cls_id is not None else None), ) res.append(det) return FocoosDetections(detections=res) diff --git a/notebooks/.dataset.ipynb b/notebooks/.dataset.ipynb index c8dffac..3b1b585 100644 --- a/notebooks/.dataset.ipynb +++ b/notebooks/.dataset.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "from utils import display_instseg, display_detection\n", + "from utils import display_detection\n", "\n", "PATH = \"../data/concrete2/train\"\n", "\n", diff --git a/notebooks/.monitor.ipynb b/notebooks/.monitor.ipynb index 57acd64..6ccbb2a 100644 --- a/notebooks/.monitor.ipynb +++ b/notebooks/.monitor.ipynb @@ -6,10 +6,10 @@ "metadata": {}, "outputs": [], "source": [ - "from focoos import Focoos\n", - "from pprint import pprint\n", "import os\n", "\n", + "from focoos import Focoos\n", + "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"))\n", "\n", "models = focoos.list_models()\n", diff --git a/notebooks/concrete.ipynb b/notebooks/concrete.ipynb index 103e95d..7c626ae 100644 --- a/notebooks/concrete.ipynb +++ b/notebooks/concrete.ipynb @@ -13,9 +13,10 @@ "metadata": {}, "outputs": [], "source": [ + "from utils import display_instseg\n", + "\n", "PATH = \"../data/concrete/valid\"\n", "\n", - "from utils import display_instseg\n", "\n", "display_instseg(PATH, annotate=True)" ] @@ -33,9 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from pprint import pprint\n", - "from focoos import DeploymentMode, Focoos" + "from focoos import Focoos" ] }, { @@ -99,15 +98,13 @@ "metadata": {}, "outputs": [], "source": [ - "from focoos.ports import Hyperparameters, TrainInstance\n", + "from focoos.ports import Hyperparameters\n", "\n", "res = model.train(\n", " dataset_ref=dataset.ref, # dataset reference -> Aeroscapes\n", " max_runtime_in_seconds=36000,\n", " # instance_type=TrainInstance.ML_G5_XLARGE,\n", - " hyperparameters=Hyperparameters(\n", - " learning_rate=0.0005, batch_size=32, max_iters=2000\n", - " ),\n", + " hyperparameters=Hyperparameters(learning_rate=0.0005, batch_size=32, max_iters=2000),\n", ")" ] }, diff --git a/notebooks/utils.py b/notebooks/utils.py index 2e25a9f..4e79a54 100644 --- a/notebooks/utils.py +++ b/notebooks/utils.py @@ -85,9 +85,7 @@ def display_instseg(path, num_images=9, annotate=True): if ann["image_id"] == img_info["id"]: category_id = ann["category_id"] if category_id not in masks: - masks[category_id] = np.zeros( - (img_info["height"], img_info["width"]) - ) + masks[category_id] = np.zeros((img_info["height"], img_info["width"])) for seg in ann["segmentation"]: poly = np.array(seg).reshape(-1, 2).astype(np.int32) cv2.fillPoly(masks[category_id], [poly], 1) @@ -115,9 +113,7 @@ def run_inference(image, conf=0.5): # Load and resize the image resized, _ = image_preprocess(image, resize=640) # Using standard 640 size # Save to temporary file - tmp_path = ( - f"/Users/fcdl94/Develop/focoos/data/{os.path.basename(image)}_resized.jpg" - ) + tmp_path = f"/Users/fcdl94/Develop/focoos/data/{os.path.basename(image)}_resized.jpg" # resized is in CHW format, need to convert to HWC and uint8 for saving img_to_save = resized[0].transpose(1, 2, 0).astype(np.uint8) cv2.imwrite(tmp_path, img_to_save) @@ -135,13 +131,11 @@ def run_inference(image, conf=0.5): with gr.Column(): output_image = gr.Image(type="pil") output_detections = gr.JSON() - examples = gr.Examples( + gr.Examples( fn=run_inference, inputs=[image], outputs=[output_image], - examples=[ - paths[i] for i in random.sample(range(len(paths)), min(5, len(paths))) - ], + examples=[paths[i] for i in random.sample(range(len(paths)), min(5, len(paths)))], ) start_btn.click( fn=run_inference, diff --git a/tests/helpers.py b/tests/helpers.py index ba313cb..209641b 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -19,10 +19,6 @@ def not_raises(expected_exception: ExceptionType): try: yield except expected_exception as err: - raise AssertionError( - f"Expected no exception of type {repr(expected_exception)} to be raised, but got: {err}" - ) + raise AssertionError(f"Expected no exception of type {repr(expected_exception)} to be raised, but got: {err}") except Exception as err: - raise AssertionError( - f"An unexpected exception of type {type(err).__name__} was raised: {err}" - ) + raise AssertionError(f"An unexpected exception of type {type(err).__name__} was raised: {err}") diff --git a/tests/test_focoos.py b/tests/test_focoos.py index 3074724..355f074 100644 --- a/tests/test_focoos.py +++ b/tests/test_focoos.py @@ -98,9 +98,7 @@ def mock_local_model(): def test_focoos_initialization_no_api_key(focoos_instance: Focoos): focoos_instance.http_client.get = MagicMock( - return_value=MagicMock( - status_code=200, json=lambda: {"email": "test@example.com"} - ) + return_value=MagicMock(status_code=200, json=lambda: {"email": "test@example.com"}) ) FOCOOS_CONFIG.focoos_api_key = "" with pytest.raises(ValueError): @@ -141,9 +139,7 @@ def test_get_model_info(focoos_instance: Focoos): "task": "detection", "status": "TRAINING_COMPLETED", } - focoos_instance.http_client.get = MagicMock( - return_value=MagicMock(status_code=200, json=lambda: mock_response) - ) + focoos_instance.http_client.get = MagicMock(return_value=MagicMock(status_code=200, json=lambda: mock_response)) model_info = focoos_instance.get_model_info("test-model") assert model_info.name == "test-model" assert model_info.ref == "model-ref" @@ -158,9 +154,7 @@ def test_get_model_info_fail(focoos_instance: Focoos): def test_list_models(focoos_instance: Focoos, mock_list_models): - focoos_instance.http_client.get = MagicMock( - return_value=MagicMock(status_code=200, json=lambda: mock_list_models) - ) + focoos_instance.http_client.get = MagicMock(return_value=MagicMock(status_code=200, json=lambda: mock_list_models)) models = focoos_instance.list_models() assert len(models) == 2 @@ -195,9 +189,7 @@ def test_list_focoos_models(focoos_instance: Focoos): }, ] - focoos_instance.http_client.get = MagicMock( - return_value=MagicMock(status_code=200, json=lambda: mock_response) - ) + focoos_instance.http_client.get = MagicMock(return_value=MagicMock(status_code=200, json=lambda: mock_response)) models = focoos_instance.list_focoos_models() assert len(models) == 2 @@ -252,9 +244,7 @@ def test_get_model_by_name_remote( mock_remote_model, model_name, ): - focoos_instance.list_models = MagicMock( - return_value=mock_list_models_as_base_models - ) + focoos_instance.list_models = MagicMock(return_value=mock_list_models_as_base_models) focoos_instance.get_remote_model = MagicMock(return_value=mock_remote_model) model = focoos_instance.get_model_by_name(name=model_name, remote=True) @@ -270,9 +260,7 @@ def test_get_model_by_name_local( mock_local_model, model_name, ): - focoos_instance.list_models = MagicMock( - return_value=mock_list_models_as_base_models - ) + focoos_instance.list_models = MagicMock(return_value=mock_list_models_as_base_models) focoos_instance.get_local_model = MagicMock(return_value=mock_local_model) model = focoos_instance.get_model_by_name(name=model_name, remote=False) assert model is not None @@ -281,16 +269,12 @@ def test_get_model_by_name_local( def test_get_model_by_name_model_not_found(focoos_instance: Focoos, mock_list_models): - focoos_instance.http_client.get = MagicMock( - return_value=MagicMock(status_code=200, json=lambda: mock_list_models) - ) + focoos_instance.http_client.get = MagicMock(return_value=MagicMock(status_code=200, json=lambda: mock_list_models)) model = focoos_instance.get_model_by_name(name="model3") assert model is None -def test_get_remote_model( - mocker: MockerFixture, focoos_instance: Focoos, mock_remote_model, mock_http_client -): +def test_get_remote_model(mocker: MockerFixture, focoos_instance: Focoos, mock_remote_model, mock_http_client): mock_remote_model_class = mocker.patch("focoos.focoos.RemoteModel", autospec=True) mock_remote_model_class.return_value = mock_remote_model model_ref = "ref1" @@ -301,9 +285,7 @@ def test_get_remote_model( assert isinstance(model, RemoteModel) -def test_get_local_model( - mocker: MockerFixture, focoos_instance: Focoos, mock_local_model -): +def test_get_local_model(mocker: MockerFixture, focoos_instance: Focoos, mock_local_model): # Mock the LocalModel class mock_local_model_class = mocker.patch("focoos.focoos.LocalModel", autospec=True) mock_local_model_class.return_value = mock_local_model @@ -324,26 +306,20 @@ def test_get_local_model( # Assertions assert model is not None assert model.model_ref == model_ref - mock_local_model_class.assert_called_once_with( - model_path.parent.as_posix(), FOCOOS_CONFIG.runtime_type - ) + mock_local_model_class.assert_called_once_with(model_path.parent.as_posix(), FOCOOS_CONFIG.runtime_type) assert isinstance(model, LocalModel) # Assert _download_model was not called download_model_spy.assert_not_called() -def test_get_local_model_with_download( - mocker: MockerFixture, focoos_instance: Focoos, mock_local_model -): +def test_get_local_model_with_download(mocker: MockerFixture, focoos_instance: Focoos, mock_local_model): # Mock the LocalModel class mock_local_model_class = mocker.patch("focoos.focoos.LocalModel", autospec=True) mock_local_model_class.return_value = mock_local_model # Spy on the _download_model method - mock_download_model = mocker.patch.object( - focoos_instance, "_download_model", autospec=True - ) + mock_download_model = mocker.patch.object(focoos_instance, "_download_model", autospec=True) with tempfile.TemporaryDirectory() as temp_dir: focoos_instance.cache_dir = temp_dir @@ -359,9 +335,7 @@ def test_get_local_model_with_download( # Assertions assert model is not None assert model.model_ref == model_ref - mock_local_model_class.assert_called_once_with( - model_path.parent.as_posix(), FOCOOS_CONFIG.runtime_type - ) + mock_local_model_class.assert_called_once_with(model_path.parent.as_posix(), FOCOOS_CONFIG.runtime_type) assert isinstance(model, LocalModel) # Assert _download_model was not called @@ -388,22 +362,14 @@ def test_new_model_created( model = focoos_instance.new_model("fakename", "fakefocoosmodel", "fakedescription") assert model is not None - mock_remote_model_class.assert_called_once_with( - mock_remote_model.model_ref, mock_http_client - ) + mock_remote_model_class.assert_called_once_with(mock_remote_model.model_ref, mock_http_client) assert isinstance(model, RemoteModel) -def test_new_model_already_exists( - mocker: MockerFixture, focoos_instance: Focoos, mock_remote_model: RemoteModel -): +def test_new_model_already_exists(mocker: MockerFixture, focoos_instance: Focoos, mock_remote_model: RemoteModel): model_name = "fakename" - focoos_instance.http_client.post = MagicMock( - return_value=MagicMock(status_code=409) - ) - mock_get_model_by_name = mocker.patch.object( - focoos_instance, "get_model_by_name", autospec=True - ) + focoos_instance.http_client.post = MagicMock(return_value=MagicMock(status_code=409)) + mock_get_model_by_name = mocker.patch.object(focoos_instance, "get_model_by_name", autospec=True) mock_get_model_by_name.return_value = mock_remote_model model = focoos_instance.new_model(model_name, "fakefocoosmodel", "fakedescription") @@ -414,9 +380,7 @@ def test_new_model_already_exists( def test_new_model_fail(focoos_instance: Focoos): model_name = "fakename" - focoos_instance.http_client.post = MagicMock( - return_value=MagicMock(status_code=500) - ) + focoos_instance.http_client.post = MagicMock(return_value=MagicMock(status_code=500)) model = focoos_instance.new_model(model_name, "fakefocoosmodel", "fakedescription") assert model is None @@ -445,9 +409,7 @@ def test_download_model_onnx_fail(focoos_instance: Focoos): assert not (pathlib.Path(focoos_instance.cache_dir) / "model.onnx").exists() -def test_download_model_onnx_ok_but_get_external_fail( - mocker: MockerFixture, focoos_instance: Focoos -): +def test_download_model_onnx_ok_but_get_external_fail(mocker: MockerFixture, focoos_instance: Focoos): model_ref = "ref1" focoos_instance.http_client.get = MagicMock( return_value=MagicMock( @@ -458,15 +420,9 @@ def test_download_model_onnx_ok_but_get_external_fail( }, ), ) - mock_model_metadata = mocker.patch( - "focoos.focoos.ModelMetadata.from_json", autospec=True - ) - mock_model_metadata.return_value = MagicMock( - model_dump_json=lambda: "fake_model_dump" - ) - focoos_instance.http_client.get_external_url = MagicMock( - return_value=MagicMock(status_code=500) - ) + mock_model_metadata = mocker.patch("focoos.focoos.ModelMetadata.from_json", autospec=True) + mock_model_metadata.return_value = MagicMock(model_dump_json=lambda: "fake_model_dump") + focoos_instance.http_client.get_external_url = MagicMock(return_value=MagicMock(status_code=500)) with tempfile.TemporaryDirectory() as model_dir_tmp: focoos_instance.cache_dir = model_dir_tmp with pytest.raises(ValueError): @@ -488,12 +444,8 @@ def test_download_model_onnx(mocker: MockerFixture, focoos_instance: Focoos): }, ), ) - mock_model_metadata = mocker.patch( - "focoos.focoos.ModelMetadata.from_json", autospec=True - ) - mock_model_metadata.return_value = MagicMock( - model_dump_json=lambda: "fake_model_dump" - ) + mock_model_metadata = mocker.patch("focoos.focoos.ModelMetadata.from_json", autospec=True) + mock_model_metadata.return_value = MagicMock(model_dump_json=lambda: "fake_model_dump") focoos_instance.http_client.get_external_url = MagicMock( return_value=MagicMock( status_code=200, @@ -510,7 +462,4 @@ def test_download_model_onnx(mocker: MockerFixture, focoos_instance: Focoos): focoos_instance.cache_dir = model_dir_tmp model_path = focoos_instance._download_model(model_ref) assert model_path is not None - assert ( - model_path - == (pathlib.Path(model_dir_tmp) / model_ref / "model.onnx").as_posix() - ) + assert model_path == (pathlib.Path(model_dir_tmp) / model_ref / "model.onnx").as_posix() diff --git a/tests/test_local_model.py b/tests/test_local_model.py index fba984a..141856b 100644 --- a/tests/test_local_model.py +++ b/tests/test_local_model.py @@ -36,21 +36,15 @@ def mock_local_model(mocker: MockerFixture, mock_model_dir, image_ndarray): model = LocalModel(model_dir=mock_model_dir, runtime_type=RuntimeTypes.ONNX_CPU) # Mock BoxAnnotator - mock_box_annotator = mocker.patch( - "focoos.local_model.sv.BoxAnnotator", autospec=True - ) + mock_box_annotator = mocker.patch("focoos.local_model.sv.BoxAnnotator", autospec=True) mock_box_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) # Mock LabelAnnotator - mock_label_annotator = mocker.patch( - "focoos.local_model.sv.LabelAnnotator", autospec=True - ) + mock_label_annotator = mocker.patch("focoos.local_model.sv.LabelAnnotator", autospec=True) mock_label_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) # Mock MaskAnnotator - mock_mask_annotator = mocker.patch( - "focoos.local_model.sv.MaskAnnotator", autospec=True - ) + mock_mask_annotator = mocker.patch("focoos.local_model.sv.MaskAnnotator", autospec=True) mock_mask_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) # Inject mock annotators into the local model @@ -116,9 +110,7 @@ def test_annotate_detection_metadata_classes_none( mock_local_model.mask_annotator.annotate.assert_not_called() -def test_annotate_detection( - image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections -): +def test_annotate_detection(image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections): annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections) assert annotated_im is not None assert isinstance(annotated_im, np.ndarray) @@ -127,9 +119,7 @@ def test_annotate_detection( mock_local_model.mask_annotator.annotate.assert_not_called() -def test_annotate_semseg( - image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections -): +def test_annotate_semseg(image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections): mock_local_model.metadata.task = FocoosTask.SEMSEG annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections) assert annotated_im is not None @@ -157,9 +147,7 @@ def mock_infer_setup( mock_scale_detections.return_value = mock_sv_detections # Mock sv_to_focoos_detections - mock_sv_to_focoos_detections = mocker.patch( - "focoos.local_model.sv_to_focoos_detections" - ) + mock_sv_to_focoos_detections = mocker.patch("focoos.local_model.sv_to_focoos_detections") mock_sv_to_focoos_detections.return_value = mock_focoos_detections # Mock _annotate @@ -174,9 +162,7 @@ class MockRuntime(MagicMock): def __call__(self, *args, **kwargs): return mock_sv_detections - mock_runtime_call = mocker.patch.object( - MockRuntime, "__call__", return_value=mock_sv_detections - ) + mock_runtime_call = mocker.patch.object(MockRuntime, "__call__", return_value=mock_sv_detections) mock_local_model.runtime = MockRuntime(spec=ONNXRuntime) return ( diff --git a/tests/test_ports.py b/tests/test_ports.py index 202287f..0e34953 100644 --- a/tests/test_ports.py +++ b/tests/test_ports.py @@ -27,10 +27,7 @@ def test_validate_wandb_project_invalid(): for value in invalid_values: with pytest.raises(ValidationError) as exc_info: Hyperparameters(wandb_project=value) - assert ( - "Wandb project name must only contain characters, dashes, underscores, and dots." - in str(exc_info.value) - ) + assert "Wandb project name must only contain characters, dashes, underscores, and dots." in str(exc_info.value) def test_validate_s3_url_valid(): diff --git a/tests/test_remote_model.py b/tests/test_remote_model.py index b447bb0..7bb96ea 100644 --- a/tests/test_remote_model.py +++ b/tests/test_remote_model.py @@ -9,39 +9,27 @@ from focoos.remote_model import RemoteModel -def _get_mock_remote_model( - mocker: MockerFixture, mock_http_client, image_ndarray, mock_metadata: ModelMetadata -): - mock_http_client.get = MagicMock( - return_value=MagicMock(status_code=200, json=lambda: mock_metadata.model_dump()) - ) +def _get_mock_remote_model(mocker: MockerFixture, mock_http_client, image_ndarray, mock_metadata: ModelMetadata): + mock_http_client.get = MagicMock(return_value=MagicMock(status_code=200, json=lambda: mock_metadata.model_dump())) model = RemoteModel(model_ref="test_model_ref", http_client=mock_http_client) # Mock BoxAnnotator - mock_box_annotator = mocker.patch( - "focoos.remote_model.sv.BoxAnnotator", autospec=True - ) + mock_box_annotator = mocker.patch("focoos.remote_model.sv.BoxAnnotator", autospec=True) mock_box_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) # Mock LabelAnnotator - mock_label_annotator = mocker.patch( - "focoos.remote_model.sv.LabelAnnotator", autospec=True - ) + mock_label_annotator = mocker.patch("focoos.remote_model.sv.LabelAnnotator", autospec=True) mock_label_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) # Mock MaskAnnotator - mock_mask_annotator = mocker.patch( - "focoos.remote_model.sv.MaskAnnotator", autospec=True - ) + mock_mask_annotator = mocker.patch("focoos.remote_model.sv.MaskAnnotator", autospec=True) mock_mask_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) return model @pytest.fixture -def mock_remote_model( - mocker: MockerFixture, mock_http_client, image_ndarray, mock_metadata: ModelMetadata -): +def mock_remote_model(mocker: MockerFixture, mock_http_client, image_ndarray, mock_metadata: ModelMetadata): return _get_mock_remote_model( mocker=mocker, mock_http_client=mock_http_client, @@ -70,27 +58,21 @@ def test_remote_model_initialization_ok( def test_train_status_fail(mock_remote_model: RemoteModel): with pytest.raises(ValueError): - mock_remote_model.http_client.get = MagicMock( - return_value=MagicMock(status_code=500) - ) + mock_remote_model.http_client.get = MagicMock(return_value=MagicMock(status_code=500)) mock_remote_model.train_status() def test_train_status_ok(mock_remote_model: RemoteModel): with tests.not_raises(Exception): mock_remote_model.http_client.get = MagicMock( - return_value=MagicMock( - status_code=200, json=MagicMock(return_value={"status": "training"}) - ) + return_value=MagicMock(status_code=200, json=MagicMock(return_value={"status": "training"})) ) result = mock_remote_model.train_status() assert result == {"status": "training"} def test_train_logs_fail(mock_remote_model: RemoteModel): - mock_remote_model.http_client.get = MagicMock( - return_value=MagicMock(status_code=500, text="Internal Server Error") - ) + mock_remote_model.http_client.get = MagicMock(return_value=MagicMock(status_code=500, text="Internal Server Error")) result = mock_remote_model.train_logs() assert result == [] @@ -98,9 +80,7 @@ def test_train_logs_fail(mock_remote_model: RemoteModel): def test_train_logs_ok(mock_remote_model: RemoteModel): with tests.not_raises(Exception): mock_remote_model.http_client.get = MagicMock( - return_value=MagicMock( - status_code=200, json=MagicMock(return_value=["log1", "log2"]) - ) + return_value=MagicMock(status_code=200, json=MagicMock(return_value=["log1", "log2"])) ) result = mock_remote_model.train_logs() assert result == ["log1", "log2"] @@ -108,40 +88,30 @@ def test_train_logs_ok(mock_remote_model: RemoteModel): def test_stop_training_fail(mock_remote_model: RemoteModel): with pytest.raises(ValueError): - mock_remote_model.http_client.delete = MagicMock( - return_value=MagicMock(status_code=500) - ) + mock_remote_model.http_client.delete = MagicMock(return_value=MagicMock(status_code=500)) mock_remote_model.stop_training() def test_stop_training_ok(mock_remote_model: RemoteModel): with tests.not_raises(Exception): - mock_remote_model.http_client.delete = MagicMock( - return_value=MagicMock(status_code=200) - ) + mock_remote_model.http_client.delete = MagicMock(return_value=MagicMock(status_code=200)) mock_remote_model.stop_training() def test_delete_model_fail(mock_remote_model: RemoteModel): with pytest.raises(ValueError): - mock_remote_model.http_client.delete = MagicMock( - return_value=MagicMock(status_code=500) - ) + mock_remote_model.http_client.delete = MagicMock(return_value=MagicMock(status_code=500)) mock_remote_model.delete_model() def test_delete_model_ok(mock_remote_model: RemoteModel): with tests.not_raises(Exception): - mock_remote_model.http_client.delete = MagicMock( - return_value=MagicMock(status_code=204) - ) + mock_remote_model.http_client.delete = MagicMock(return_value=MagicMock(status_code=204)) mock_remote_model.delete_model() def test_train_metrics_fail(mock_remote_model: RemoteModel): - mock_remote_model.http_client.get = MagicMock( - return_value=MagicMock(status_code=500, text="Internal Server Error") - ) + mock_remote_model.http_client.get = MagicMock(return_value=MagicMock(status_code=500, text="Internal Server Error")) result = mock_remote_model.train_metrics() assert result is None @@ -171,9 +141,7 @@ def test_train_fail( mock_hyperparameters: Hyperparameters, ): with pytest.raises(ValueError): - mock_remote_model.http_client.post = MagicMock( - return_value=MagicMock(status_code=500) - ) + mock_remote_model.http_client.post = MagicMock(return_value=MagicMock(status_code=500)) mock_remote_model.train( dataset_ref="dataset_123", hyperparameters=mock_hyperparameters, @@ -184,9 +152,7 @@ def test_train_fail( ) -def test_train_ok( - mock_remote_model: RemoteModel, mock_hyperparameters: Hyperparameters -): +def test_train_ok(mock_remote_model: RemoteModel, mock_hyperparameters: Hyperparameters): mock_remote_model.http_client.post = MagicMock( return_value=MagicMock( status_code=200, diff --git a/tests/test_runtime.py b/tests/test_runtime.py index 663c356..bf8f1cf 100644 --- a/tests/test_runtime.py +++ b/tests/test_runtime.py @@ -19,9 +19,7 @@ def test_det_post_process(): conf_threshold = 0.75 sv_detections = det_postprocess(out, im0_shape, conf_threshold) - np.testing.assert_array_equal( - sv_detections.xyxy, np.array([[48, 128, 144, 256], [240, 384, 336, 512]]) - ) + np.testing.assert_array_equal(sv_detections.xyxy, np.array([[48, 128, 144, 256], [240, 384, 336, 512]])) assert sv_detections.class_id is not None np.testing.assert_array_equal(sv_detections.class_id, np.array([1, 2])) assert sv_detections.confidence is not None @@ -147,9 +145,7 @@ def test_get_run_time(mocker: MockerFixture, tmp_path, runtime_type, expected_op # mock opts mock_onnxruntime_class = mocker.patch("focoos.runtime.ONNXRuntime", autospec=True) - mock_onnxruntime_class.return_value = MagicMock( - spec=ONNXRuntime, opts=expected_opts - ) + mock_onnxruntime_class.return_value = MagicMock(spec=ONNXRuntime, opts=expected_opts) # warmup_iter warmup_iter = 2 diff --git a/tests/test_system.py b/tests/test_system.py index f9ae70e..d1176f6 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -45,9 +45,7 @@ def test_get_gpu_name(): def test_get_cpu_name(): - with patch( - "platform.processor", return_value="Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz" - ): + with patch("platform.processor", return_value="Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz"): assert get_cpu_name() == "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz" @@ -86,9 +84,7 @@ def test_http_client_post(extra_headers): client = HttpClient(api_key="test_key", host_url="http://example.com") with patch("requests.post") as mock_post: mock_post.return_value.status_code = 201 - response = client.post( - "test/path", data={"key": "value"}, extra_headers=extra_headers - ) + response = client.post("test/path", data={"key": "value"}, extra_headers=extra_headers) assert response.status_code == 201 mock_post.assert_called_with( "http://example.com/test/path", diff --git a/tests/utils/test_vision.py b/tests/utils/test_vision.py index 6c35130..c66be9c 100644 --- a/tests/utils/test_vision.py +++ b/tests/utils/test_vision.py @@ -113,9 +113,7 @@ def test_focoos_detections_to_supervision_bbox(focoos_detections_bbox): result = focoos_detections_to_supervision(focoos_detections_bbox) # Verify the result is an instance of Supervision Detections - assert isinstance( - result[0], sv.Detections - ), "Result should be an instance of Supervision Detections" + assert isinstance(result[0], sv.Detections), "Result should be an instance of Supervision Detections" # Verify the number of detections assert len(result.xyxy) == 1, "Expected 1 detection" # Verify the bounding box coordinates @@ -130,9 +128,7 @@ def test_focoos_detections_to_supervision_mask(focoos_detections_mask): result = focoos_detections_to_supervision(focoos_detections_mask) # Verify the result is an instance of Supervision Detections - assert isinstance( - result[0], sv.Detections - ), "Result should be an instance of Supervision Detections" + assert isinstance(result[0], sv.Detections), "Result should be an instance of Supervision Detections" # # Verify the number of detections # FIXME: https://github.com/FocoosAI/focoos/issues/38 # assert len(result.xyxy) == 0, "Expected 0 detection" @@ -145,9 +141,7 @@ def test_focoos_detections_no_detections(focoos_detections_no_detections): result = focoos_detections_to_supervision(focoos_detections_no_detections) # Verify the result is an instance of Supervision Detections - assert isinstance( - result, sv.Detections - ), "Result should be an instance of sv.Detections" + assert isinstance(result, sv.Detections), "Result should be an instance of sv.Detections" # Verify the number of detections assert len(result.xyxy) == 0, "Expected 0 detection" # Verify the mask is None @@ -166,24 +160,16 @@ def test_sv_to_focoos_detections(sv_detections: sv.Detections): result = sv_to_focoos_detections(sv_detections) # Verify the result is an instance of FocoosDetections - assert isinstance( - result, FocoosDetections - ), "Result should be an instance of FocoosDetections" + assert isinstance(result, FocoosDetections), "Result should be an instance of FocoosDetections" assert len(result.detections) == 1, "Expected 1 detection" result_focoos_detection = result.detections[0] # Verify the result is an instance of FocoosDet - assert isinstance( - result_focoos_detection, FocoosDet - ), "Result should be an instance of FocoosDet" + assert isinstance(result_focoos_detection, FocoosDet), "Result should be an instance of FocoosDet" assert result_focoos_detection.cls_id == 1, "Expected class ID 1" assert result_focoos_detection.label is None, "Label should be None" - assert ( - result_focoos_detection.conf is not None - ), "Confidence score should not be None" - assert math.isclose( - result_focoos_detection.conf, 0.9 - ), "Expected confidence score 0.9" + assert result_focoos_detection.conf is not None, "Confidence score should not be None" + assert math.isclose(result_focoos_detection.conf, 0.9), "Expected confidence score 0.9" assert result_focoos_detection.bbox == [ 10, 20, From ba45d9c09e96ba7594922b4c7f3be980a9a2462c Mon Sep 17 00:00:00 2001 From: Ivan Murabito Date: Wed, 22 Jan 2025 10:48:48 +0000 Subject: [PATCH 3/4] fix test --- tests/test_focoos.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_focoos.py b/tests/test_focoos.py index 355f074..35cb86e 100644 --- a/tests/test_focoos.py +++ b/tests/test_focoos.py @@ -9,7 +9,7 @@ from focoos import Focoos from focoos.config import FOCOOS_CONFIG from focoos.local_model import LocalModel -from focoos.ports import ModelPreview +from focoos.ports import ModelNotFound, ModelPreview from focoos.remote_model import RemoteModel @@ -270,8 +270,8 @@ def test_get_model_by_name_local( def test_get_model_by_name_model_not_found(focoos_instance: Focoos, mock_list_models): focoos_instance.http_client.get = MagicMock(return_value=MagicMock(status_code=200, json=lambda: mock_list_models)) - model = focoos_instance.get_model_by_name(name="model3") - assert model is None + with pytest.raises(ModelNotFound): + focoos_instance.get_model_by_name(name="model3") def test_get_remote_model(mocker: MockerFixture, focoos_instance: Focoos, mock_remote_model, mock_http_client): From 71642990e9afe1b7a68031032984d47ceb263075 Mon Sep 17 00:00:00 2001 From: Ivan Murabito Date: Fri, 24 Jan 2025 10:57:14 +0000 Subject: [PATCH 4/4] feat: enhance training monitoring and metrics visualization - Added new dependencies: matplotlib, colorama, and ipython for improved visualization and interactivity. - Updated cloud training documentation to reflect changes in training job initiation and monitoring. - Refactored training status retrieval to use a new `train_info` method, replacing the previous `train_status`. - Introduced a `metrics` method in the RemoteModel class to fetch training metrics. - Enhanced the `notebook_monitor_train` method to support real-time monitoring and plotting of training metrics. - Fixed typos in class names and improved code consistency across files. --- docs/how_to/cloud_training.md | 30 ++-- focoos/__init__.py | 6 +- focoos/ports.py | 17 ++- focoos/remote_model.py | 186 +++++++++++------------- focoos/utils/metrics.py | 138 ++++++++++++++++++ notebooks/playground.ipynb | 112 ++++++++++----- pyproject.toml | 3 + tests/test_remote_model.py | 263 +++++++++++++++++++++++++++------- tests/utils/test_metrics.py | 103 +++++++++++++ 9 files changed, 646 insertions(+), 212 deletions(-) create mode 100644 focoos/utils/metrics.py create mode 100644 tests/utils/test_metrics.py diff --git a/docs/how_to/cloud_training.md b/docs/how_to/cloud_training.md index fe2a153..26fa384 100644 --- a/docs/how_to/cloud_training.md +++ b/docs/how_to/cloud_training.md @@ -20,7 +20,6 @@ pprint(datasets) ``` ##  Initiating a Cloud Training Job - To start training, configure the model, dataset, and training parameters as shown below: ```python @@ -29,7 +28,6 @@ from focoos.ports import Hyperparameters, TrainInstance model = focoos.get_remote_model("") res = model.train( - anyma_version="0.11.1", dataset_ref="", instance_type=TrainInstance.ML_G4DN_XLARGE, volume_size=50, @@ -42,29 +40,19 @@ res = model.train( resolution=640, ), # type: ignore ) -pprint(res) ``` -##  Monitoring Training Progress +##  Monitoring Training Progress on jupyter notebook Once the training job is initiated, monitor its progress by polling the training status. Use the following code: ```python -import time -from pprint import pprint -from focoos.utils.logger import get_logger +from focoos import Focoos -completed_status = ["Completed", "Failed"] -logger = get_logger(__name__) +focoos = Focoos(api_key=os.getenv("FOCOOS_API_KEY")) model = focoos.get_remote_model("") -status = model.train_status() - -while status["main_status"] not in completed_status: - status = model.train_status() - logger.info(f"Training status: {status['main_status']}") - pprint(f"Training progress: {status['status_transitions']}") - time.sleep(30) +model.notebook_monitor_train(interval=30, plot_metrics=True) ``` ##  Retrieving Training Logs @@ -75,3 +63,13 @@ After the training process is complete, retrieve the logs for detailed insights: logs = model.train_logs() pprint(logs) ``` + +## Retrieve and Visualize Training Metrics + +```python +from focoos.utils.metrics import MetricsVisualizer + +metrics = model.metrics() +visualizer = MetricsVisualizer(metrics) +visualizer.log_metrics() +``` diff --git a/focoos/__init__.py b/focoos/__init__.py index 79bb2a7..5665b00 100644 --- a/focoos/__init__.py +++ b/focoos/__init__.py @@ -21,11 +21,12 @@ OnnxEngineOpts, RuntimeTypes, SystemInfo, - TraininingInfo, + TrainingInfo, TrainInstance, ) from .remote_model import RemoteModel from .runtime import ONNXRuntime, get_runtime +from .utils.logger import get_logger from .utils.system import get_system_info from .utils.vision import ( base64mask_to_mask, @@ -59,7 +60,7 @@ "OnnxEngineOpts", "RuntimeTypes", "SystemInfo", - "TraininingInfo", + "TrainingInfo", "TrainInstance", "get_system_info", "ONNXRuntime", @@ -75,4 +76,5 @@ "image_preprocess", "index_to_class", "sv_to_focoos_detections", + "get_logger", ] diff --git a/focoos/ports.py b/focoos/ports.py index 6c7bf28..dcd54e4 100644 --- a/focoos/ports.py +++ b/focoos/ports.py @@ -32,12 +32,13 @@ class DeploymentMode(str, Enum): class ModelStatus(str, Enum): CREATED = "CREATED" + TRAINING_STARTING = "TRAINING_STARTING" TRAINING_RUNNING = "TRAINING_RUNNING" TRAINING_ERROR = "TRAINING_ERROR" TRAINING_COMPLETED = "TRAINING_COMPLETED" TRAINING_STOPPED = "TRAINING_STOPPED" - DEPLOY_ERROR = "DEPLOY_ERROR" DEPLOYED = "DEPLOYED" + DEPLOY_ERROR = "DEPLOY_ERROR" class DatasetLayout(str, Enum): @@ -109,7 +110,7 @@ def validate_wandb_project(cls, value): return value -class TraininingInfo(FocoosBaseModel): +class TrainingInfo(FocoosBaseModel): algorithm_name: str instance_type: Optional[str] = None volume_size: Optional[int] = 100 @@ -118,7 +119,6 @@ class TraininingInfo(FocoosBaseModel): secondary_status: Optional[str] = None failure_reason: Optional[str] = None elapsed_time: Optional[int] = None - final_metrics: Optional[list[dict]] = None status_transitions: list[dict] = [] start_time: Optional[datetime] = None end_time: Optional[datetime] = None @@ -178,7 +178,7 @@ class ModelMetadata(FocoosBaseModel): classes: Optional[list[str]] = None im_size: Optional[int] = None hyperparameters: Optional[Hyperparameters] = None - training_info: Optional[TraininingInfo] = None + training_info: Optional[TrainingInfo] = None location: Optional[str] = None dataset: Optional[DatasetInfo] = None @@ -324,3 +324,12 @@ class ModelNotFound(Exception): def __init__(self, message: str): self.message = message super().__init__(self.message) + + +class Metrics(FocoosBaseModel): + infer_metrics: list[dict] = [] + valid_metrics: list[dict] = [] + train_metrics: list[dict] = [] + iterations: Optional[int] = None + best_valid_metric: Optional[dict] = None + updated_at: Optional[datetime] = None diff --git a/focoos/remote_model.py b/focoos/remote_model.py index 320974d..e4dd030 100644 --- a/focoos/remote_model.py +++ b/focoos/remote_model.py @@ -39,10 +39,14 @@ FocoosDetections, FocoosTask, Hyperparameters, + Metrics, ModelMetadata, + ModelStatus, + TrainingInfo, TrainInstance, ) from focoos.utils.logger import get_logger +from focoos.utils.metrics import MetricsVisualizer from focoos.utils.system import HttpClient from focoos.utils.vision import focoos_detections_to_supervision, image_loader @@ -107,7 +111,6 @@ def train( self, dataset_ref: str, hyperparameters: Hyperparameters, - anyma_version: str = "anyma-sagemaker-cu12-torch22-0111", instance_type: TrainInstance = TrainInstance.ML_G4DN_XLARGE, volume_size: int = 50, max_runtime_in_seconds: int = 36000, @@ -137,7 +140,6 @@ def train( f"models/{self.model_ref}/train", data={ "dataset_ref": dataset_ref, - "anyma_version": anyma_version, "instance_type": instance_type, "volume_size": volume_size, "max_runtime_in_seconds": max_runtime_in_seconds, @@ -149,7 +151,7 @@ def train( raise ValueError(f"Failed to train model: {res.status_code} {res.text}") return res.json() - def train_status(self) -> dict | None: + def train_info(self) -> Optional[TrainingInfo]: """ Retrieve the current status of the model training. @@ -165,7 +167,7 @@ def train_status(self) -> dict | None: if res.status_code != 200: logger.error(f"Failed to get train status: {res.status_code} {res.text}") raise ValueError(f"Failed to get train status: {res.status_code} {res.text}") - return res.json() + return TrainingInfo(**res.json()) def train_logs(self) -> list[str]: """ @@ -187,6 +189,26 @@ def train_logs(self) -> list[str]: return [] return res.json() + def metrics(self) -> Metrics: # noqa: F821 + """ + Retrieve the metrics of the model. + + This method sends a request to fetch the metrics of the model identified by `model_ref`. + If the request is successful (status code 200), it returns the metrics as a `Metrics` object. + If the request fails, it logs a warning and returns an empty `Metrics` object. + + Returns: + Metrics: An object containing the metrics of the model. + + Raises: + None: Returns an empty `Metrics` object if the request fails. + """ + res = self.http_client.get(f"models/{self.model_ref}/metrics") + if res.status_code != 200: + logger.warning(f"Failed to get metrics: {res.status_code} {res.text}") + return Metrics() # noqa: F821 + return Metrics(**res.json()) + def _annotate(self, im: np.ndarray, detections: sv.Detections) -> np.ndarray: """ Annotate an image with detection results. @@ -284,115 +306,73 @@ def infer( logger.error(f"Failed to infer: {res.status_code} {res.text}") raise ValueError(f"Failed to infer: {res.status_code} {res.text}") - def train_metrics(self, period=60) -> dict | None: - """ - Retrieve training metrics for the model over a specified period. - - This method fetches the training metrics for the remote model, including aggregated values, - such as average performance metrics over the given period. - - Args: - period (int, optional): The period (in seconds) for which to fetch the metrics. Defaults to 60. - - Returns: - Optional[dict]: A dictionary containing the training metrics if the request is successful, - or None if the request fails. - """ - res = self.http_client.get( - f"models/{self.model_ref}/train/all-metrics?period={period}&aggregation_type=Average" - ) - if res.status_code != 200: - logger.warning(f"Failed to get train logs: {res.status_code} {res.text}") - return None - return res.json() - - def _log_metrics(self): + def notebook_monitor_train(self, interval: int = 30, plot_metrics: bool = False, max_runtime: int = 36000) -> None: """ - Log the latest training metrics for the model. + Monitor the training process in a Jupyter notebook and display metrics. - This method retrieves the current training metrics, such as iteration, total loss, and evaluation - metrics (like mIoU for segmentation tasks or AP50 for detection tasks). It logs the most recent values - for these metrics, helping monitor the model's training progress. - - The logged metrics depend on the model's task: - - For segmentation tasks (SEMSEG), the mean Intersection over Union (mIoU) is logged. - - For detection tasks, the Average Precision at 50% IoU (AP50) is logged. - - Returns: - None: The method only logs the metrics without returning any value. - - Logs: - - Iteration number. - - Total loss value. - - Relevant evaluation metric (mIoU or AP50). - """ - metrics = self.train_metrics() - if metrics: - iter = metrics["iter"][-1] if "iter" in metrics and len(metrics["iter"]) > 0 else -1 - total_loss = metrics["total_loss"][-1] if "total_loss" in metrics and len(metrics["total_loss"]) > 0 else -1 - if self.metadata.task == FocoosTask.SEMSEG: - accuracy = metrics["mIoU"][-1] if "mIoU" in metrics and len(metrics["mIoU"]) > 0 else "-" - eval_metric = "mIoU" - else: - accuracy = metrics["AP50"][-1] if "AP50" in metrics and len(metrics["AP50"]) > 0 else "-" - eval_metric = "AP50" - logger.info(f"Iter {iter:.0f}: Loss {total_loss:.2f}, {eval_metric} {accuracy}") - - def monitor_train(self, update_period=30) -> None: - """ - Monitor the training process of the model and log its status periodically. - - This method continuously checks the model's training status and logs updates based on the current state. - It monitors the primary and secondary statuses of the model, and performs the following actions: - - If the status is "Pending", it logs a waiting message and waits for resources. - - If the status is "InProgress", it logs the current status and elapsed time, and logs the training metrics if the model is actively training. - - If the status is "Completed", it logs the final metrics and exits. - - If the training fails, is stopped, or any unexpected status occurs, it logs the status and exits. + Periodically checks the training status and displays metrics in a notebook cell. + Clears previous output to maintain a clean view. Args: - update_period (int, optional): The time (in seconds) to wait between status checks. Default is 30 seconds. + interval (int): Time between status checks in seconds. Must be 30-240. Default: 30 + plot_metrics (bool): Whether to plot metrics graphs. Default: False + max_runtime (int): Maximum monitoring time in seconds. Default: 36000 (10 hours) Returns: - None: This method does not return any value but logs information about the training process. - - Logs: - - The current training status, including elapsed time. - - Training metrics at regular intervals while the model is training. + None """ - completed_status = ["Completed", "Failed", "Stopped"] - # init to make do-while - status = {"main_status": "Flag", "secondary_status": "Flag"} - prev_status = status - while status["main_status"] not in completed_status: - prev_status = status - status = self.train_status() - elapsed = status.get("elapsed_time", 0) - # Model at the startup - if not status["main_status"] or status["main_status"] in ["Pending"]: - if prev_status["main_status"] != status["main_status"]: - logger.info("[0s] Waiting for resources...") - sleep(update_period) - continue - # Training in progress - if status["main_status"] in ["InProgress"]: - if prev_status["secondary_status"] != status["secondary_status"]: - if status["secondary_status"] in ["Starting", "Pending"]: - logger.info(f"[0s] {status['main_status']}: {status['secondary_status']}") - else: - logger.info( - f"[{elapsed // 60}m:{elapsed % 60}s] {status['main_status']}: {status['secondary_status']}" - ) - if status["secondary_status"] in ["Training"]: - self._log_metrics() - sleep(update_period) - continue - if status["main_status"] == "Completed": - self._log_metrics() + from IPython.display import clear_output + + if not 30 <= interval <= 240: + raise ValueError("Interval must be between 30 and 240 seconds") + + last_update = self.get_info().updated_at + start_time = time.time() + status_history = [] + + while True: + # Get current status + model_info = self.get_info() + status = model_info.status + + # Clear and display status + clear_output(wait=True) + status_msg = f"[Live Monitor {self.metadata.name}] {status.value}" + status_history.append(status_msg) + for msg in status_history: + logger.info(msg) + + # Show metrics if training completed + if status == ModelStatus.TRAINING_COMPLETED: + metrics = self.metrics() + if metrics.best_valid_metric: + logger.info(f"Best Checkpoint (iter: {metrics.best_valid_metric.get('iteration', 'N/A')}):") + for k, v in metrics.best_valid_metric.items(): + logger.info(f" {k}: {v}") + visualizer = MetricsVisualizer(metrics) + visualizer.log_metrics() + if plot_metrics: + visualizer.notebook_plot_training_metrics() + + # Update metrics during training + if status == ModelStatus.TRAINING_RUNNING and model_info.updated_at > last_update: + last_update = model_info.updated_at + metrics = self.metrics() + visualizer = MetricsVisualizer(metrics) + visualizer.log_metrics() + if plot_metrics: + visualizer.notebook_plot_training_metrics() + + # Check exit conditions + if status not in [ModelStatus.CREATED, ModelStatus.TRAINING_RUNNING, ModelStatus.TRAINING_STARTING]: return - else: - logger.info(f"Model is not training, status: {status['main_status']}") + + if time.time() - start_time > max_runtime: + logger.warning(f"Monitoring exceeded {max_runtime} seconds limit") return + sleep(interval) + def stop_training(self) -> None: """ Stop the training process of the model. diff --git a/focoos/utils/metrics.py b/focoos/utils/metrics.py new file mode 100644 index 0000000..7361039 --- /dev/null +++ b/focoos/utils/metrics.py @@ -0,0 +1,138 @@ +from colorama import Fore, Style + +from focoos.ports import Metrics + + +class MetricsVisualizer: + def __init__(self, metrics: Metrics): + self.metrics = metrics + self.green_up = f"{Fore.GREEN}(↑){Style.RESET_ALL}" + self.red_down = f"{Fore.RED}(↓){Style.RESET_ALL}" + + def log_metrics(self): + def format_time_metrics(iteration, metric, is_valid=False, is_best=False): + color = Fore.GREEN if is_valid else Fore.BLUE + star = f"{Fore.YELLOW}*{Style.RESET_ALL}" if is_best else "" + text = f"{color}[iter {iteration} {'valid' if is_valid else ''}]{star}{Style.RESET_ALL}: " + metric_text = [] + for key, value in metric.items(): + if key not in ["device", "engine", "is_valid"]: + metric_text.append(f"{key}: {value}") + return text + " ".join(metric_text) + + def format_infer_metrics(metric): + color = Fore.BLUE + text = f"{color}[device={metric.get('device')}, engine={metric.get('engine')}]{Style.RESET_ALL}: " + for key, value in metric.items(): + if key != "device" and key != "engine": + text += f"{key}: {value} " + return text + + if self.metrics.train_metrics: + print(f"{Fore.YELLOW}[Training metrics]{Style.RESET_ALL}") + for item in self.metrics.valid_metrics: + item["is_valid"] = True + ordered_metrics = sorted( + self.metrics.train_metrics + self.metrics.valid_metrics, key=lambda x: int(x.get("iteration", -1)) + ) + previous_valid_metric = None + for metric in ordered_metrics: + iter = metric["iteration"] + is_valid = metric.get("is_valid", False) + is_best = self.metrics.best_valid_metric and self.metrics.best_valid_metric["iteration"] == iter + text = format_time_metrics(iter, metric, is_valid, is_best) # type: ignore + + if is_valid and previous_valid_metric: + for key, value in metric.items(): + if key in previous_valid_metric and key not in ["device", "engine", "is_valid", "iteration"]: + prev_value = previous_valid_metric[key] + if isinstance(value, (int, float)) and isinstance(prev_value, (int, float)): + if value > prev_value: + text = text.replace(f"{key}: {value}", f"{key}: {value} {self.green_up}") + elif value < prev_value: + text = text.replace(f"{key}: {value}", f"{key}: {value} {self.red_down}") + previous_valid_metric = metric + elif is_valid: + previous_valid_metric = metric + print(text) + + if self.metrics.infer_metrics: + print(f"{Fore.YELLOW}[Inference metrics]{Style.RESET_ALL}") + for metric in self.metrics.infer_metrics: + text = format_infer_metrics(metric) + print(text) + + def notebook_plot_training_metrics(self): + """ + Plots training and validation metrics on a grid. + Each key containing 'loss' is plotted as a separate line, excluding 'total_loss'. + 'total_loss' and valid_metrics are plotted on separate graphs. + """ + import matplotlib.pyplot as plt + + def plot_on_axis(ax, x_values, y_values_dict, xlabel, ylabel, title): + for label, y_values in y_values_dict.items(): + ax.plot(x_values, y_values, linestyle="-", label=label) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(title) + ax.legend() + ax.grid(True) + + if not self.metrics.train_metrics: + print("No training metrics available to plot.") + return + + iterations = [metric["iteration"] for metric in self.metrics.train_metrics] + loss_keys = [key for key in self.metrics.train_metrics[0].keys() if "loss" in key and key != "total_loss"] + losses_dict = { + loss_key: [metric[loss_key] for metric in self.metrics.train_metrics if loss_key in metric] + for loss_key in loss_keys + } + + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(30, 12)) + + plot_on_axis(ax1, iterations, losses_dict, "Iterations", "Loss", "Training Metrics: Losses vs Iterations") + + if "total_loss" in self.metrics.train_metrics[0]: + total_losses = [metric["total_loss"] for metric in self.metrics.train_metrics if "total_loss" in metric] + plot_on_axis( + ax2, + iterations, + {"total_loss": total_losses}, + "Iterations", + "Total Loss", + "Training Metrics: Total Loss vs Iterations", + ) + + if self.metrics.valid_metrics: + val_keys_start = ["bbox/", "sem_seg/", "segm/", "panoptic_seg/PQ"] + valid_iterations = [metric["iteration"] for metric in self.metrics.valid_metrics] + valid_keys = [ + key + for key in self.metrics.valid_metrics[0].keys() + if any(key.startswith(prefix) for prefix in val_keys_start) + ] + valid_values_dict = { + valid_key: [metric[valid_key] for metric in self.metrics.valid_metrics if valid_key in metric] + for valid_key in valid_keys + } + plot_on_axis( + ax3, + valid_iterations, + valid_values_dict, + "Iterations", + "Validation Metrics", + "Validation Metrics vs Iterations", + ) + if self.metrics.infer_metrics: + infer_fps = [metric["fps"] for metric in self.metrics.infer_metrics if "fps" in metric] + if infer_fps: + ax4.bar(range(len(infer_fps)), infer_fps, color="skyblue") + ax4.set_xlabel("Inference Sample") + ax4.set_ylabel("FPS") + ax4.set_title("Inference Performance (FPS)") + ax4.grid(True, axis="y") + + plt.tight_layout() + plt.show() diff --git a/notebooks/playground.ipynb b/notebooks/playground.ipynb index 186ed49..b353968 100644 --- a/notebooks/playground.ipynb +++ b/notebooks/playground.ipynb @@ -13,7 +13,7 @@ "metadata": {}, "outputs": [], "source": [ - "%pip install -e ..[dev,gpu]" + "%uv pip install -e ..[dev,gpu]" ] }, { @@ -118,11 +118,9 @@ "\n", "from supervision import plot_image\n", "\n", - "from focoos import Focoos\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", - "focoos = Focoos(\n", - " api_key=os.getenv(\"FOCOOS_API_KEY\"),\n", - ")\n", + "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "image_path = \"./assets/ade_val_034.jpg\"\n", "model_ref = \"focoos_object365\"\n", "\n", @@ -266,7 +264,7 @@ "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", - "model = focoos.get_remote_model(\"fa94df6806c84c11\")\n", + "model = focoos.get_remote_model(os.getenv(\"FOCOOS_MODEL_REF\"))\n", "model_info = model.get_info()" ] }, @@ -295,7 +293,7 @@ "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", - "model = focoos.get_remote_model(\"fa94df6806c84c11\")\n", + "model = focoos.get_remote_model(os.getenv(\"FOCOOS_MODEL_REF\"))\n", "model_info = model.get_info()\n", "image_path = \"./assets/aquarium.jpg\"\n", "\n", @@ -366,24 +364,13 @@ "import os\n", "from pprint import pprint\n", "\n", - "from dotenv import load_dotenv\n", - "\n", "from focoos import DEV_API_URL, Focoos\n", "\n", - "load_dotenv()\n", - "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", - "models = focoos.list_models()\n", - "pprint(models)\n", - "\n", - "model = focoos.new_model(name=\"test-model-2\", focoos_model=\"focoos_object365\", description=\"Test model\")\n", - "\n", - "\n", - "model = focoos.get_remote_model(\"fa94df6806c84c11\")\n", - "\n", "datasets = focoos.list_shared_datasets()\n", - "pprint(datasets)" + "pprint(datasets)\n", + "dataset_ref = \"f92ac3b33b284a80\"" ] }, { @@ -392,25 +379,32 @@ "metadata": {}, "outputs": [], "source": [ - "from focoos.ports import Hyperparameters, TrainInstance\n", + "import os\n", "\n", - "model = focoos.get_remote_model(\"fa94df6806c84c11\")\n", + "from focoos.ports import Hyperparameters, TrainInstance\n", "\n", + "model = focoos.new_model(name=\"test-model-4\", focoos_model=\"focoos_coco_tiny\", description=\"Test model 4\")\n", + "model_ref = model.model_ref\n", "res = model.train(\n", - " anyma_version=\"0.11.1\",\n", - " dataset_ref=\"11e80dd77806450f\",\n", + " dataset_ref=dataset_ref,\n", " instance_type=TrainInstance.ML_G4DN_XLARGE,\n", " volume_size=50,\n", " max_runtime_in_seconds=36000,\n", " hyperparameters=Hyperparameters(\n", " learning_rate=0.0001,\n", - " batch_size=16,\n", - " max_iters=1500,\n", + " batch_size=32,\n", + " max_iters=5000,\n", " eval_period=100,\n", " resolution=640,\n", " ), # type: ignore\n", - ")\n", - "pprint(res)" + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Monitor Training" ] }, { @@ -419,19 +413,13 @@ "metadata": {}, "outputs": [], "source": [ - "import time\n", + "import os\n", "\n", - "from focoos.utils.logger import get_logger\n", + "from focoos import DEV_API_URL, Focoos\n", "\n", - "completed_status = [\"Completed\", \"Failed\"]\n", - "logger = get_logger(__name__)\n", - "model = focoos.get_remote_model(\"fa94df6806c84c11\")\n", - "status = model.train_status()\n", - "while status[\"main_status\"] not in completed_status:\n", - " status = model.train_status()\n", - " logger.info(f\"Training status: {status['main_status']}\")\n", - " pprint(f\"Training progress: {status['status_transitions']}\")\n", - " time.sleep(30)" + "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", + "model = focoos.get_remote_model(\"focoos_object365\")\n", + "model.notebook_monitor_train(interval=30, plot_metrics=True)" ] }, { @@ -487,6 +475,52 @@ "system_info.pretty_print()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model Metrics\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pprint import pprint\n", + "\n", + "from focoos import LOCAL_API_URL, Focoos\n", + "\n", + "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=LOCAL_API_URL)\n", + "\n", + "model = focoos.get_remote_model(os.getenv(\"FOCOOS_MODEL_REF\"))\n", + "metrics = model.metrics()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize Metrics\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from focoos.utils.metrics import MetricsVisualizer\n", + "\n", + "metrics_visualizer = MetricsVisualizer(metrics)\n", + "metrics_visualizer.log_metrics()\n", + "metrics_visualizer.notebook_plot_training_metrics()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/pyproject.toml b/pyproject.toml index c0351e0..f183cbc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,9 @@ dependencies = [ "gputil~=1.4.0", "psutil~=6.1.1", "setuptools~=75.7.0", + "matplotlib~=3.10.0", + "colorama~=0.4.6", + "ipython~=8.31.0", ] authors = [{ name = "focoos.ai", email = "info@focoos.ai" }] diff --git a/tests/test_remote_model.py b/tests/test_remote_model.py index 7bb96ea..005ff1e 100644 --- a/tests/test_remote_model.py +++ b/tests/test_remote_model.py @@ -5,7 +5,7 @@ from pytest_mock import MockerFixture import tests -from focoos.ports import FocoosTask, Hyperparameters, ModelMetadata, TrainInstance +from focoos.ports import FocoosTask, Hyperparameters, Metrics, ModelMetadata, ModelStatus, TrainingInfo, TrainInstance from focoos.remote_model import RemoteModel @@ -59,16 +59,29 @@ def test_remote_model_initialization_ok( def test_train_status_fail(mock_remote_model: RemoteModel): with pytest.raises(ValueError): mock_remote_model.http_client.get = MagicMock(return_value=MagicMock(status_code=500)) - mock_remote_model.train_status() + mock_remote_model.train_info() def test_train_status_ok(mock_remote_model: RemoteModel): with tests.not_raises(Exception): mock_remote_model.http_client.get = MagicMock( - return_value=MagicMock(status_code=200, json=MagicMock(return_value={"status": "training"})) + return_value=MagicMock( + status_code=200, + json=MagicMock( + return_value={ + "algorithm_name": "anyma0.12.7", + "instance_type": "ml.g4dn.xlarge", + "volume_size": 100, + } + ), + ) + ) + result = mock_remote_model.train_info() + assert result == TrainingInfo( + algorithm_name="anyma0.12.7", + instance_type="ml.g4dn.xlarge", + volume_size=100, ) - result = mock_remote_model.train_status() - assert result == {"status": "training"} def test_train_logs_fail(mock_remote_model: RemoteModel): @@ -112,8 +125,8 @@ def test_delete_model_ok(mock_remote_model: RemoteModel): def test_train_metrics_fail(mock_remote_model: RemoteModel): mock_remote_model.http_client.get = MagicMock(return_value=MagicMock(status_code=500, text="Internal Server Error")) - result = mock_remote_model.train_metrics() - assert result is None + result = mock_remote_model.metrics() + assert result == Metrics() def test_train_metrics_ok(mock_remote_model: RemoteModel): @@ -121,11 +134,13 @@ def test_train_metrics_ok(mock_remote_model: RemoteModel): mock_remote_model.http_client.get = MagicMock( return_value=MagicMock( status_code=200, - json=MagicMock(return_value={"accuracy": 0.95, "loss": 0.1}), + json=MagicMock(return_value={"train_metrics": [{"iteration": 1, "loss": 0.1, "sem_seg/mIoU": 0.95}]}), ) ) - result = mock_remote_model.train_metrics() - assert result == {"accuracy": 0.95, "loss": 0.1} + result = mock_remote_model.metrics() + assert result == Metrics( + train_metrics=[{"iteration": 1, "loss": 0.1, "sem_seg/mIoU": 0.95}], + ) @pytest.fixture() @@ -145,7 +160,6 @@ def test_train_fail( mock_remote_model.train( dataset_ref="dataset_123", hyperparameters=mock_hyperparameters, - anyma_version="anyma-sagemaker-cu12-torch22-0111", instance_type=TrainInstance.ML_G4DN_XLARGE, volume_size=50, max_runtime_in_seconds=36000, @@ -167,7 +181,6 @@ def test_train_ok(mock_remote_model: RemoteModel, mock_hyperparameters: Hyperpar result = mock_remote_model.train( dataset_ref="dataset_123", hyperparameters=mock_hyperparameters, - anyma_version="anyma-sagemaker-cu12-torch22-0111", instance_type=TrainInstance.ML_G4DN_XLARGE, volume_size=50, max_runtime_in_seconds=36000, @@ -175,64 +188,218 @@ def test_train_ok(mock_remote_model: RemoteModel, mock_hyperparameters: Hyperpar assert result == {"status": "training started", "model_ref": "model_123"} -def test_log_metrics_semseg(mock_remote_model: RemoteModel, mocker): +def test_metrics_semseg(mock_remote_model: RemoteModel, mocker): mocker.patch.object( mock_remote_model, - "train_metrics", - return_value={ - "iter": [1, 2, 3], - "total_loss": [0.5, 0.4, 0.3], - "mIoU": [0.7, 0.8, 0.85], - }, + "metrics", + return_value=Metrics( + train_metrics=[{"iteration": 2, "loss": 0.01, "sem_seg/mIoU": 0.90}], + valid_metrics=[{"iteration": 3, "loss": 0.1, "sem_seg/mIoU": 0.95}], + best_valid_metric={"iteration": 3, "loss": 0.1, "sem_seg/mIoU": 0.95}, + ), ) mock_remote_model.metadata.task = FocoosTask.SEMSEG - mock_logger = mocker.patch("focoos.remote_model.logger.info") - mock_remote_model._log_metrics() + metrics = mock_remote_model.metrics() + assert isinstance(metrics, Metrics) + assert metrics.best_valid_metric == {"iteration": 3, "loss": 0.1, "sem_seg/mIoU": 0.95} + assert metrics.train_metrics == [{"iteration": 2, "loss": 0.01, "sem_seg/mIoU": 0.90}] + assert metrics.valid_metrics == [{"iteration": 3, "loss": 0.1, "sem_seg/mIoU": 0.95}] + assert metrics.infer_metrics == [] - mock_logger.assert_called_once_with("Iter 3: Loss 0.30, mIoU 0.85") - -def test_log_metrics_detection(mock_remote_model: RemoteModel, mocker): +def test_metrics_detection(mock_remote_model: RemoteModel, mocker): mocker.patch.object( mock_remote_model, - "train_metrics", - return_value={ - "iter": [1, 2, 3], - "total_loss": [0.6, 0.5, 0.4], - "AP50": [0.75, 0.8, 0.82], - }, + "metrics", + return_value=Metrics( + train_metrics=[{"iteration": 1, "loss": 0.6, "bbox/AP50": 0.75}], + valid_metrics=[{"iteration": 1, "loss": 0.5, "bbox/AP50": 0.8}], + best_valid_metric={"iteration": 1, "loss": 0.4, "bbox/AP50": 0.82}, + ), ) mock_remote_model.metadata.task = FocoosTask.DETECTION - mock_logger = mocker.patch("focoos.remote_model.logger.info") - mock_remote_model._log_metrics() + metrics = mock_remote_model.metrics() + assert metrics.best_valid_metric == {"iteration": 1, "loss": 0.4, "bbox/AP50": 0.82} + assert metrics.train_metrics == [{"iteration": 1, "loss": 0.6, "bbox/AP50": 0.75}] + assert metrics.valid_metrics == [{"iteration": 1, "loss": 0.5, "bbox/AP50": 0.8}] + assert metrics.infer_metrics == [] - mock_logger.assert_called_once_with("Iter 3: Loss 0.40, AP50 0.82") +def test_notebook_monitor_train_invalid_interval(mock_remote_model: RemoteModel): + """Test that monitor_train raises ValueError for invalid intervals.""" + with pytest.raises(ValueError, match="Interval must be between 30 and 240 seconds"): + mock_remote_model.notebook_monitor_train(interval=20) + with pytest.raises(ValueError, match="Interval must be between 30 and 240 seconds"): + mock_remote_model.notebook_monitor_train(interval=250) + + +def test_notebook_monitor_train_completed(mock_remote_model: RemoteModel, mocker): + """Test monitoring when training completes successfully.""" + # Mock time functions + mocker.patch("time.time", return_value=1000) + mock_sleep = mocker.patch("time.sleep") + mock_clear = mocker.patch("IPython.display.clear_output") + + # Mock model info and metrics + mocker.patch.object( + mock_remote_model, "get_info", return_value=mocker.Mock(status=ModelStatus.TRAINING_COMPLETED, updated_at=1000) + ) + mocker.patch.object( + mock_remote_model, + "metrics", + return_value=Metrics( + train_metrics=[{"iteration": 1, "loss": 0.6}], + valid_metrics=[{"iteration": 1, "loss": 0.5}], + best_valid_metric={"iteration": 1, "loss": 0.5}, + infer_metrics=[], + ), + ) + + mock_remote_model.notebook_monitor_train(interval=30, plot_metrics=False) -def test_log_metrics_empty_metrics(mock_remote_model: RemoteModel, mocker): - mocker.patch.object(mock_remote_model, "train_metrics", return_value=None) - mock_logger = mocker.patch("focoos.remote_model.logger.info") + assert mock_clear.called + assert not mock_sleep.called - mock_remote_model._log_metrics() - mock_logger.assert_not_called() +def test_notebook_monitor_train_running(mock_remote_model: RemoteModel, mocker): + """Test monitoring during active training.""" + # Mock time functions + mocker.patch( + "time.time", + side_effect=[1000 + i * 30 for i in range(10)], # Provide enough values for all time.time() calls + ) + mock_sleep = mocker.patch("focoos.remote_model.sleep") + # mock_time = mocker.patch("focoos.remote_model.time") + mock_clear = mocker.patch("IPython.display.clear_output") + # Mock model info with different states -def test_log_metrics_missing_keys(mock_remote_model: RemoteModel, mocker): mocker.patch.object( mock_remote_model, - "train_metrics", - return_value={ - "iter": [1, 2, 3], - # 'total_loss' key is missing - "AP50": [0.75, 0.8, 0.82], + "get_info", + side_effect=[ + mocker.Mock(status=ModelStatus.TRAINING_RUNNING, updated_at=1001), + mocker.Mock(status=ModelStatus.TRAINING_RUNNING, updated_at=1002), + mocker.Mock(status=ModelStatus.TRAINING_COMPLETED, updated_at=1003), + ], + ) + mocker.patch.object( + mock_remote_model, + "metrics", + return_value=Metrics( + train_metrics=[{"iteration": 1, "loss": 0.6}], + valid_metrics=[{"iteration": 1, "loss": 0.5}], + best_valid_metric={"iteration": 1, "loss": 0.5, "sem_seg/mIoU": 0.95}, + infer_metrics=[], + ), + ) + + mock_remote_model.notebook_monitor_train(interval=30, plot_metrics=False) + print(f"sleep call count: {mock_sleep.call_count}") + assert mock_clear.called + assert mock_clear.call_count == 2 + assert mock_sleep.call_count == 1 + + assert mock_sleep.call_args == mocker.call(30) + + +def test_notebook_monitor_train_max_runtime(mock_remote_model: RemoteModel, mocker): + """Test that monitoring stops when max runtime is exceeded.""" + # Mock time module + mocker.patch( + "focoos.remote_model.time", + **{ + "time": mocker.Mock(side_effect=[1000, 40000]), # First call for start_time, second for check + "sleep": mocker.Mock(), # Prevent actual sleeping }, ) - mock_remote_model.metadata.task = FocoosTask.DETECTION - mock_logger = mocker.patch("focoos.remote_model.logger.info") - mock_remote_model._log_metrics() + mock_clear = mocker.patch("IPython.display.clear_output") + mock_logger = mocker.patch("logging.Logger.warning") + + # Create a mock that always returns TRAINING_RUNNING + mock_info = mocker.Mock() + mock_info.status = ModelStatus.TRAINING_RUNNING + mock_info.updated_at = 1000 + + mocker.patch.object( + mock_remote_model, + "get_info", + return_value=mock_info, # Always return the same mock object + ) + + mocker.patch.object( + mock_remote_model, + "metrics", + return_value=Metrics( + train_metrics=[{"iteration": 1, "loss": 0.6}], + valid_metrics=[], + best_valid_metric=None, + infer_metrics=[], + ), + ) + + mock_remote_model.notebook_monitor_train(interval=30, max_runtime=36000) + assert mock_clear.called + + # Verify that warning was logged + mock_logger.assert_called_with("Monitoring exceeded 36000 seconds limit") + + +def test_notebook_monitor_train_with_metrics_plot(mock_remote_model: RemoteModel, mocker): + """Test monitoring with metrics plotting enabled.""" + mocker.patch("time.time", return_value=1000) + mock_clear = mocker.patch("IPython.display.clear_output") + mock_plot = mocker.patch("focoos.utils.metrics.MetricsVisualizer.notebook_plot_training_metrics") + + mocker.patch.object( + mock_remote_model, + "get_info", + return_value=mocker.Mock(status=ModelStatus.TRAINING_COMPLETED, updated_at=1000), + ) + mocker.patch.object( + mock_remote_model, + "metrics", + return_value=Metrics( + train_metrics=[{"iteration": 1, "loss": 0.6}], + valid_metrics=[{"iteration": 1, "loss": 0.5}], + best_valid_metric={"iteration": 1, "loss": 0.5}, + infer_metrics=[], + ), + ) + mocker.patch.object( + mock_remote_model, + "metrics", + return_value=Metrics( + train_metrics=[{"iteration": 1, "loss": 0.6}], + valid_metrics=[{"iteration": 1, "loss": 0.5}], + best_valid_metric={"iteration": 1, "loss": 0.5, "sem_seg/mIoU": 0.95}, + infer_metrics=[], + ), + ) + + mock_remote_model.notebook_monitor_train(interval=30, plot_metrics=True) + + assert mock_plot.called + assert mock_clear.called + + +def test_notebook_monitor_train_status_history(mock_remote_model: RemoteModel, mocker): + """Test status history logging during monitoring.""" + mocker.patch("time.time", return_value=1000) + mocker.patch("IPython.display.clear_output") + mock_logger = mocker.patch("logging.Logger.info") + + mocker.patch.object( + mock_remote_model, + "get_info", + return_value=mocker.Mock(status=ModelStatus.TRAINING_COMPLETED, updated_at=1000), + ) + mock_remote_model.metadata.name = "test_model" + + mock_remote_model.notebook_monitor_train(interval=30) - mock_logger.assert_called_once_with("Iter 3: Loss -1.00, AP50 0.82") + expected_msg = f"[Live Monitor test_model] {ModelStatus.TRAINING_COMPLETED.value}" + mock_logger.assert_any_call(expected_msg) diff --git a/tests/utils/test_metrics.py b/tests/utils/test_metrics.py new file mode 100644 index 0000000..e070f42 --- /dev/null +++ b/tests/utils/test_metrics.py @@ -0,0 +1,103 @@ +from unittest.mock import Mock, patch + +import pytest +from colorama import Fore, Style + +from focoos.ports import Metrics +from focoos.utils.metrics import MetricsVisualizer + + +@pytest.fixture +def sample_metrics(): + metrics = Mock(spec=Metrics) + metrics.train_metrics = [ + {"iteration": 1, "loss_1": 0.5, "total_loss": 1.0}, + {"iteration": 2, "loss_1": 0.3, "total_loss": 0.8}, + ] + metrics.valid_metrics = [ + {"iteration": 1, "bbox/AP": 0.6, "bbox/AP50": 0.9, "is_valid": True}, + {"iteration": 2, "bbox/AP": 0.7, "bbox/AP50": 0.8, "is_valid": True}, + ] + metrics.infer_metrics = [ + {"device": "cuda", "engine": "tensorrt", "fps": 30.5}, + {"device": "cuda", "engine": "tensorrt", "fps": 31.0}, + ] + metrics.best_valid_metric = {"iteration": 2, "bbox/AP": 0.7, "bbox/AP50": 0.8} + return metrics + + +@pytest.fixture +def metrics_visualizer(sample_metrics): + return MetricsVisualizer(sample_metrics) + + +def test_init(metrics_visualizer): + """Test the initialization of MetricsVisualizer.""" + assert isinstance(metrics_visualizer, MetricsVisualizer) + assert metrics_visualizer.green_up == f"{Fore.GREEN}(↑){Style.RESET_ALL}" + assert metrics_visualizer.red_down == f"{Fore.RED}(↓){Style.RESET_ALL}" + + +@patch("builtins.print") +def test_log_metrics_training(mock_print, metrics_visualizer): + """Test logging of training metrics.""" + metrics_visualizer.log_metrics() + mock_print.assert_any_call(f"{Fore.YELLOW}[Training metrics]{Style.RESET_ALL}") + + +@patch("builtins.print") +def test_log_metrics_inference(mock_print, metrics_visualizer): + """Test logging of inference metrics.""" + metrics_visualizer.log_metrics() + mock_print.assert_any_call(f"{Fore.YELLOW}[Inference metrics]{Style.RESET_ALL}") + + +def test_notebook_plot_training_metrics(metrics_visualizer): + """Test plotting functionality.""" + with patch("matplotlib.pyplot.show") as mock_show: + metrics_visualizer.notebook_plot_training_metrics() + mock_show.assert_called_once() + + +def test_notebook_plot_no_metrics(): + """Test plotting behavior when no metrics are available.""" + empty_metrics = Mock(spec=Metrics) + empty_metrics.train_metrics = [] + empty_metrics.valid_metrics = [] + empty_metrics.infer_metrics = [] + visualizer = MetricsVisualizer(empty_metrics) + + with patch("builtins.print") as mock_print: + visualizer.notebook_plot_training_metrics() + mock_print.assert_called_once_with("No training metrics available to plot.") + + +def test_metrics_comparison(metrics_visualizer): + """Test metrics comparison and arrow indicators.""" + with patch("builtins.print") as mock_print: + metrics_visualizer.log_metrics() + # Verifica che sia stata chiamata almeno una volta print + assert mock_print.called + + +@pytest.mark.parametrize( + "metrics_data", + [ + { + "train_metrics": [{"iteration": 1, "loss": 0.5}], + "valid_metrics": [], + "infer_metrics": [], + "best_valid_metric": {"iteration": 1, "loss": 0.5}, + }, + {"train_metrics": [], "valid_metrics": [], "infer_metrics": [{"device": "cuda", "fps": 30.0}]}, + ], +) +def test_metrics_visualizer_edge_cases(metrics_data): + """Test MetricsVisualizer with various edge cases.""" + metrics = Mock(spec=Metrics) + for key, value in metrics_data.items(): + setattr(metrics, key, value) + visualizer = MetricsVisualizer(metrics) + with patch("builtins.print") as mock_print: + visualizer.log_metrics() + assert mock_print.called