From e9412559bcf5e01b31ffc386e755cd016e10700b Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 13:49:51 +0000 Subject: [PATCH 01/31] fix: unet from config warning --- .../image_generation/sdxl_lightning_image_generator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datadreamer/image_generation/sdxl_lightning_image_generator.py b/datadreamer/image_generation/sdxl_lightning_image_generator.py index 33c5141..f5bf977 100644 --- a/datadreamer/image_generation/sdxl_lightning_image_generator.py +++ b/datadreamer/image_generation/sdxl_lightning_image_generator.py @@ -46,16 +46,17 @@ def _init_gen_model(self): base = "stabilityai/stable-diffusion-xl-base-1.0" repo = "ByteDance/SDXL-Lightning" ckpt = "sdxl_lightning_4step_unet.safetensors" # Use the correct ckpt for your step setting! + config = UNet2DConditionModel.load_config(base, subfolder="unet") # Load model. if self.device == "cpu": print("Loading SDXL Lightning on CPU...") - unet = UNet2DConditionModel.from_config(base, subfolder="unet") + unet = UNet2DConditionModel.from_config(config) unet.load_state_dict(load_file(hf_hub_download(repo, ckpt))) pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet) else: print("Loading SDXL Lightning on GPU...") - unet = UNet2DConditionModel.from_config(base, subfolder="unet").to( + unet = UNet2DConditionModel.from_config(config).to( self.device, torch.float16 ) unet.load_state_dict( From 6481f04d70ba3763447d0ae8593d69981dd5a202 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 18:02:26 +0000 Subject: [PATCH 02/31] feat: add logger --- .../dataset_annotation/clip_annotator.py | 4 ++++ .../dataset_annotation/image_annotator.py | 9 ++++++--- .../dataset_annotation/owlv2_annotator.py | 5 ++++- .../image_generation/clip_image_tester.py | 4 ++++ .../image_generation/image_generator.py | 18 +++++++++--------- .../image_generation/sdxl_image_generator.py | 9 ++++++--- .../sdxl_lightning_image_generator.py | 9 ++++----- .../sdxl_turbo_image_generator.py | 6 ++++-- .../pipelines/generate_dataset_from_scratch.py | 3 +++ .../prompt_generation/lm_prompt_generator.py | 12 +++++++----- .../prompt_generation/lm_synonym_generator.py | 9 ++++++--- .../prompt_generation/synonym_generator.py | 4 +++- .../tinyllama_lm_prompt_generator.py | 7 ++++--- datadreamer/utils/luxonis_dataset_converter.py | 9 ++++++--- datadreamer/utils/merge_raw_datasets.py | 5 ++++- datadreamer/utils/nms.py | 5 ++++- .../utils/single_label_cls_converter.py | 7 +++++-- 17 files changed, 83 insertions(+), 42 deletions(-) diff --git a/datadreamer/dataset_annotation/clip_annotator.py b/datadreamer/dataset_annotation/clip_annotator.py index ff7b9aa..3c4e6cd 100644 --- a/datadreamer/dataset_annotation/clip_annotator.py +++ b/datadreamer/dataset_annotation/clip_annotator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import List import numpy as np @@ -10,6 +11,8 @@ from datadreamer.dataset_annotation.image_annotator import BaseAnnotator, TaskList +logger = logging.getLogger(__name__) + class CLIPAnnotator(BaseAnnotator): """A class for image annotation using the CLIP model, specializing in image @@ -63,6 +66,7 @@ def _init_model(self): Returns: CLIPModel: The initialized CLIP model. """ + logger.info(f"Initializing CLIP {self.size} model...") if self.size == "large": return CLIPModel.from_pretrained("openai/clip-vit-large-patch14") return CLIPModel.from_pretrained("openai/clip-vit-base-patch32") diff --git a/datadreamer/dataset_annotation/image_annotator.py b/datadreamer/dataset_annotation/image_annotator.py index 4479ffe..757baab 100644 --- a/datadreamer/dataset_annotation/image_annotator.py +++ b/datadreamer/dataset_annotation/image_annotator.py @@ -4,15 +4,12 @@ from abc import ABC, abstractmethod -# Enum for different labeling tasks class TaskList(enum.Enum): CLASSIFICATION = "classification" OBJECT_DETECTION = "object_detection" SEGMENTATION = "segmentation" - # Add more tasks as needed -# Abstract base class for data labeling class BaseAnnotator(ABC): """Abstract base class for creating annotators. @@ -24,6 +21,8 @@ class BaseAnnotator(ABC): Methods: annotate_batch(): Abstract method to be implemented by subclasses. It should contain the logic for performing annotation based on the task definition. + release(): Abstract method to be implemented by subclasses. It should contain + the logic for releasing the resources used by the annotator. """ def __init__( @@ -35,3 +34,7 @@ def __init__( @abstractmethod def annotate_batch(self): pass + + @abstractmethod + def release(self, empty_cuda_cache=False) -> None: + pass diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 25f247f..231558b 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import List, Tuple import numpy as np @@ -11,6 +12,8 @@ from datadreamer.dataset_annotation.utils import apply_tta from datadreamer.utils.nms import non_max_suppression +logger = logging.getLogger(__name__) + class OWLv2Annotator(BaseAnnotator): """A class for image annotation using the OWLv2 model, specializing in object @@ -54,6 +57,7 @@ def _init_model(self): Returns: Owlv2ForObjectDetection: The initialized OWLv2 model. """ + logger.info(f"Initializing OWLv2 {self.size} model...") if self.size == "large": return Owlv2ForObjectDetection.from_pretrained( "google/owlv2-large-patch14-ensemble" @@ -107,7 +111,6 @@ def _generate_annotations( ).to(self.device) with torch.no_grad(): outputs = self.model(**inputs) - # print(outputs) preds = self.processor.post_process_object_detection( outputs=outputs, target_sizes=target_sizes, threshold=conf_threshold ) diff --git a/datadreamer/image_generation/clip_image_tester.py b/datadreamer/image_generation/clip_image_tester.py index 2c67965..c1bf3b6 100644 --- a/datadreamer/image_generation/clip_image_tester.py +++ b/datadreamer/image_generation/clip_image_tester.py @@ -1,11 +1,14 @@ from __future__ import annotations +import logging from typing import List import torch from PIL import Image from transformers import CLIPModel, CLIPProcessor +logger = logging.getLogger(__name__) + class ClipImageTester: """A class for testing images against a set of textual objects using the CLIP model. @@ -22,6 +25,7 @@ class ClipImageTester: def __init__(self, device: str = "cuda") -> None: """Initializes the ClipImageTester with the CLIP model and processor.""" + logger.info("Initializing CLIP image tester...") self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") self.clip_processor = CLIPProcessor.from_pretrained( "openai/clip-vit-base-patch32" diff --git a/datadreamer/image_generation/image_generator.py b/datadreamer/image_generation/image_generator.py index 4b01f81..3e2aff7 100644 --- a/datadreamer/image_generation/image_generator.py +++ b/datadreamer/image_generation/image_generator.py @@ -30,7 +30,7 @@ class ImageGenerator: set_seed(seed): Sets the seed for random number generators. generate_images(prompts, prompt_objects): Generates images based on provided prompts and optional object prompts. release(empty_cuda_cache): Releases resources and optionally empties the CUDA cache. (Abstract method) - generate_image(prompt, negative_prompt, prompt_objects): Generates a single image based on the provided prompt. (Abstract method) + generate_images_batch(prompts, negative_prompt, prompt_objects): Generates a batch of images based on the provided prompts. Abstract method) Note: The actual model for image generation needs to be defined in the subclass. @@ -151,20 +151,20 @@ def release(self, empty_cuda_cache=False) -> None: pass @abstractmethod - def generate_image( + def generate_images_batch( self, - prompt: str, + prompts: List[str], negative_prompt: str, - prompt_objects: Optional[List[str]] = None, - ) -> Image.Image: - """Generates a single image based on the provided prompt. + prompt_objects: Optional[List[List[str]]] = None, + ) -> List[Image.Image]: + """Generates a batch of images based on the provided prompts. Args: - prompt (str): The positive prompt to guide image generation. + prompts (List[str]): A list of positive prompts to guide image generation. negative_prompt (str): The negative prompt to avoid certain features in the image. - prompt_objects (Optional[List[str]]): Optional list of objects to be used in CLIP model testing. + prompt_objects (Optional[List[List[str]]]): Optional list of objects to be used in CLIP model testing. Returns: - Image.Image: The generated image. + List[Image.Image]: A list of generated images. """ pass diff --git a/datadreamer/image_generation/sdxl_image_generator.py b/datadreamer/image_generation/sdxl_image_generator.py index 1882f4a..6dc583e 100644 --- a/datadreamer/image_generation/sdxl_image_generator.py +++ b/datadreamer/image_generation/sdxl_image_generator.py @@ -1,13 +1,17 @@ from __future__ import annotations +import logging from typing import List, Optional import torch from compel import Compel, ReturnedEmbeddingsType from diffusers import DiffusionPipeline +from PIL import Image from datadreamer.image_generation.image_generator import ImageGenerator +logger = logging.getLogger(__name__) + class StableDiffusionImageGenerator(ImageGenerator): """A subclass of ImageGenerator that uses the Stable Diffusion model for image @@ -38,8 +42,8 @@ def _init_gen_model(self): Returns: tuple: The base and refiner models. """ + logger.info(f"Initializing SDXL on {self.device}...") if self.device == "cpu": - print("Loading SDXL on CPU...") base = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", # variant="fp16", @@ -57,7 +61,6 @@ def _init_gen_model(self): ) refiner.to("cpu") else: - print("Loading SDXL on GPU...") base = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, @@ -102,7 +105,7 @@ def generate_images_batch( prompts: List[str], negative_prompt: str, prompt_objects: Optional[List[List[str]]] = None, - ): + ) -> List[Image.Image]: """Generates a batch of images based on the provided prompts. Args: diff --git a/datadreamer/image_generation/sdxl_lightning_image_generator.py b/datadreamer/image_generation/sdxl_lightning_image_generator.py index f5bf977..cc43dfe 100644 --- a/datadreamer/image_generation/sdxl_lightning_image_generator.py +++ b/datadreamer/image_generation/sdxl_lightning_image_generator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import List, Optional import torch @@ -15,6 +16,8 @@ from datadreamer.image_generation.image_generator import ImageGenerator +logger = logging.getLogger(__name__) + class StableDiffusionLightningImageGenerator(ImageGenerator): """A subclass of ImageGenerator specifically designed to use the Stable Diffusion @@ -48,14 +51,12 @@ def _init_gen_model(self): ckpt = "sdxl_lightning_4step_unet.safetensors" # Use the correct ckpt for your step setting! config = UNet2DConditionModel.load_config(base, subfolder="unet") - # Load model. + logger.info(f"Initializing SDXL Lightning on {self.device}...") if self.device == "cpu": - print("Loading SDXL Lightning on CPU...") unet = UNet2DConditionModel.from_config(config) unet.load_state_dict(load_file(hf_hub_download(repo, ckpt))) pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet) else: - print("Loading SDXL Lightning on GPU...") unet = UNet2DConditionModel.from_config(config).to( self.device, torch.float16 ) @@ -93,7 +94,6 @@ def generate_images_batch( prompts: List[str], negative_prompt: str, prompt_objects: Optional[List[List[str]]] = None, - batch_size: int = 1, ) -> List[Image.Image]: """Generates a batch of images using the Stable Diffusion Lightning model based on the provided prompts. @@ -102,7 +102,6 @@ def generate_images_batch( prompts (List[str]): A list of positive prompts to guide image generation. negative_prompt (str): The negative prompt to avoid certain features in the image. prompt_objects (Optional[List[List[str]]]): Optional list of objects for each prompt for CLIP model testing. - batch_size (int): The number of images to generate in each batch. Returns: List[Image.Image]: A list of generated images. diff --git a/datadreamer/image_generation/sdxl_turbo_image_generator.py b/datadreamer/image_generation/sdxl_turbo_image_generator.py index e78fa17..54d9dd7 100644 --- a/datadreamer/image_generation/sdxl_turbo_image_generator.py +++ b/datadreamer/image_generation/sdxl_turbo_image_generator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import List, Optional import torch @@ -8,6 +9,8 @@ from datadreamer.image_generation.image_generator import ImageGenerator +logger = logging.getLogger(__name__) + class StableDiffusionTurboImageGenerator(ImageGenerator): """A subclass of ImageGenerator specifically designed to use the Stable Diffusion @@ -34,8 +37,8 @@ def _init_gen_model(self): Returns: AutoPipelineForText2Image: The initialized Stable Diffusion Turbo model. """ + logger.info(f"Initializing SDXL Turbo on {self.device}...") if self.device == "cpu": - print("Loading SDXL Turbo on CPU...") base = AutoPipelineForText2Image.from_pretrained( "stabilityai/sdxl-turbo", # variant="fp16", @@ -44,7 +47,6 @@ def _init_gen_model(self): ) base.to("cpu") else: - print("Loading SDXL Turbo on GPU...") base = AutoPipelineForText2Image.from_pretrained( "stabilityai/sdxl-turbo", torch_dtype=torch.float16, diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index b1aef52..03e3a7d 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -11,6 +11,7 @@ import torch from box import Box from luxonis_ml.data import DATASETS_REGISTRY, LOADERS_REGISTRY +from luxonis_ml.utils import setup_logging from PIL import Image from tqdm import tqdm @@ -50,6 +51,8 @@ det_annotators = {"owlv2": OWLv2Annotator} clf_annotators = {"clip": CLIPAnnotator} +setup_logging(use_rich=True) + def parse_args(): # Argument parsing diff --git a/datadreamer/prompt_generation/lm_prompt_generator.py b/datadreamer/prompt_generation/lm_prompt_generator.py index 10ca96e..adbd44c 100644 --- a/datadreamer/prompt_generation/lm_prompt_generator.py +++ b/datadreamer/prompt_generation/lm_prompt_generator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import random import re from typing import List, Literal, Optional @@ -16,6 +17,8 @@ from datadreamer.prompt_generation.prompt_generator import PromptGenerator +logger = logging.getLogger(__name__) + class LMPromptGenerator(PromptGenerator): """A language model-based prompt generator class, extending PromptGenerator. @@ -69,8 +72,8 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin tuple: The initialized language model, tokenizer and pipeline. """ selected_dtype = "auto" + logger.info(f"Initializing Mistral-7B language model on {self.device}...") if self.device == "cpu": - print("Loading language model on CPU...") model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", torch_dtype="auto", @@ -79,7 +82,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin ) else: if self.quantization == "none": - print("Loading FP16 language model on GPU...") + logger.info("Loading FP16 language model...") selected_dtype = torch.float16 model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", @@ -88,7 +91,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin device_map=self.device, ) else: - print("Loading INT4 language model on GPU...") + logger.info("Loading INT4 language model...") # Create the BitsAndBytesConfig object with the dynamically constructed arguments bnb_config = BitsAndBytesConfig( load_in_4bit=True, @@ -115,7 +118,6 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin device_map=self.device, batch_size=self.batch_size, ) - print("Done!") return model, tokenizer, pipe def _remove_incomplete_sentence(self, text: str) -> str: @@ -219,7 +221,7 @@ def generate_prompts(self) -> List[str]: """ prompts = [] progress_bar = tqdm( - desc="Generating prompts...", position=0, total=self.prompts_number + desc="Generating prompts", position=0, total=self.prompts_number ) while len(prompts) < self.prompts_number: selected_objects_batch = [ diff --git a/datadreamer/prompt_generation/lm_synonym_generator.py b/datadreamer/prompt_generation/lm_synonym_generator.py index fc86db8..a971655 100644 --- a/datadreamer/prompt_generation/lm_synonym_generator.py +++ b/datadreamer/prompt_generation/lm_synonym_generator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import re from typing import List, Optional @@ -13,6 +14,8 @@ from datadreamer.prompt_generation.synonym_generator import SynonymGenerator +logger = logging.getLogger(__name__) + class LMSynonymGenerator(SynonymGenerator): """Synonym generator that generates synonyms for a list of words using a language @@ -48,8 +51,8 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin Returns: tuple: The initialized language model, tokenizer and pipeline. """ + logger.info(f"Initializing Mistral-7B language model on {self.device}...") if self.device == "cpu": - print("Loading language model on CPU...") model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", torch_dtype="auto", @@ -57,7 +60,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin low_cpu_mem_usage=True, ) else: - print("Loading FP16 language model on GPU...") + logger.info("Loading FP16 language model...") model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=torch.float16, @@ -73,7 +76,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin torch_dtype=torch.float16 if self.device == "cuda" else "auto", device_map=self.device, ) - print("Done!") + logger.info("Done!") return model, tokenizer, pipe def _generate_synonyms(self, prompt_text: str) -> List[str]: diff --git a/datadreamer/prompt_generation/synonym_generator.py b/datadreamer/prompt_generation/synonym_generator.py index ec3f306..bb4240e 100644 --- a/datadreamer/prompt_generation/synonym_generator.py +++ b/datadreamer/prompt_generation/synonym_generator.py @@ -1,11 +1,14 @@ from __future__ import annotations import json +import logging from abc import ABC, abstractmethod from typing import List, Optional from tqdm import tqdm +logger = logging.getLogger(__name__) + # Abstract base class for synonym generation class SynonymGenerator(ABC): @@ -51,7 +54,6 @@ def generate_synonyms_for_list(self, words: List[str]) -> dict: for word in tqdm(words, desc="Generating synonyms"): synonyms = self.generate_synonyms(word) synonyms_dict[word] = synonyms - print("Synonyms generated") return synonyms_dict def save_synonyms(self, synonyms, save_path: str) -> None: diff --git a/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py b/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py index 78238e7..ed5fdcf 100644 --- a/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py +++ b/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import re from typing import List, Literal, Optional @@ -8,6 +9,8 @@ from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator +logger = logging.getLogger(__name__) + class TinyLlamaLMPromptGenerator(LMPromptGenerator): """A language model-based prompt generator class, extending PromptGenerator. @@ -53,8 +56,8 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin Returns: tuple: The initialized language model, tokenizer and pipeline. """ + logger.info(f"Initializing TinyLlama-1.1B language model on {self.device}...") if self.device == "cpu": - print("Loading language model on CPU...") model = AutoModelForCausalLM.from_pretrained( "TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype="auto", @@ -62,7 +65,6 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin low_cpu_mem_usage=True, ) else: - print("Loading language model on GPU...") model = AutoModelForCausalLM.from_pretrained( "TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, @@ -82,7 +84,6 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin device_map=self.device, batch_size=self.batch_size, ) - print("Done!") return model, tokenizer, pipe def _remove_caption_sentences(self, text: str) -> str: diff --git a/datadreamer/utils/luxonis_dataset_converter.py b/datadreamer/utils/luxonis_dataset_converter.py index 72dc06d..9b55f79 100644 --- a/datadreamer/utils/luxonis_dataset_converter.py +++ b/datadreamer/utils/luxonis_dataset_converter.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os from luxonis_ml.data import DATASETS_REGISTRY, LuxonisDataset @@ -8,6 +9,8 @@ from datadreamer.utils import BaseConverter +logger = logging.getLogger(__name__) + class LuxonisDatasetConverter(BaseConverter): """Class for converting a dataset to LuxonisDataset format.""" @@ -84,7 +87,7 @@ def dataset_generator(): # if dataset_plugin is set, use that if self.dataset_plugin: if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: - print(f"Using {self.dataset_plugin} dataset") + logger.info(f"Using {self.dataset_plugin} dataset") dataset_constructor = DATASETS_REGISTRY.get(self.dataset_plugin) dataset = dataset_constructor(dataset_name) else: @@ -96,10 +99,10 @@ def dataset_generator(): "LUXONISML_BUCKET" in os.environ and "GOOGLE_APPLICATION_CREDENTIALS" in os.environ ): - print("Using GCS bucket") + logger.info("Using GCS bucket") dataset = LuxonisDataset(dataset_name, bucket_storage=BucketStorage.GCS) else: - print("Using local dataset") + logger.info("Using local dataset") dataset = LuxonisDataset(dataset_name) dataset.add(dataset_generator()) diff --git a/datadreamer/utils/merge_raw_datasets.py b/datadreamer/utils/merge_raw_datasets.py index 47c1dc0..77f7359 100644 --- a/datadreamer/utils/merge_raw_datasets.py +++ b/datadreamer/utils/merge_raw_datasets.py @@ -2,9 +2,12 @@ import argparse import json +import logging import os import shutil +logger = logging.getLogger(__name__) + def merge_datasets(input_dirs, output_dir, copy_files=True): config_tasks = [] @@ -29,7 +32,7 @@ def merge_datasets(input_dirs, output_dir, copy_files=True): raise ValueError("All datasets must have different random seeds") # Create output directory - print(f"Output directory: {output_dir}") + logger.info(f"Output directory: {output_dir}") if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) diff --git a/datadreamer/utils/nms.py b/datadreamer/utils/nms.py index 530707c..1373858 100644 --- a/datadreamer/utils/nms.py +++ b/datadreamer/utils/nms.py @@ -4,6 +4,7 @@ # https://github.com/ultralytics/yolov5/blob/master/utils/general.py from __future__ import annotations +import logging import os import time @@ -22,6 +23,8 @@ ) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) os.environ["NUMEXPR_MAX_THREADS"] = str(min(os.cpu_count(), 8)) # NumExpr max threads +logger = logging.getLogger(__name__) + def xywh2xyxy(x): """Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 @@ -131,7 +134,7 @@ def non_max_suppression( output[img_idx] = x[keep_box_idx] if (time.time() - tik) > time_limit: - print(f"WARNING: NMS cost time exceed the limited {time_limit}s.") + logger.warning(f"WARNING: NMS cost time exceed the limited {time_limit}s.") break # time limit exceeded return output diff --git a/datadreamer/utils/single_label_cls_converter.py b/datadreamer/utils/single_label_cls_converter.py index e5515d5..e447c40 100644 --- a/datadreamer/utils/single_label_cls_converter.py +++ b/datadreamer/utils/single_label_cls_converter.py @@ -1,10 +1,13 @@ from __future__ import annotations +import logging import os import shutil from datadreamer.utils import BaseConverter +logger = logging.getLogger(__name__) + class SingleLabelClsConverter(BaseConverter): """Class for converting a dataset for single-label classification task. @@ -64,12 +67,12 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru class_names = data["class_names"] images.remove("class_names") - print(f"Number of images: {len(images)}") + logger.info(f"Number of images: {len(images)}") # Remove images with multiple labels single_label_images = [img for img in images if len(data[img]["labels"]) == 1] - print(f"Number of images with single label: {len(single_label_images)}") + logger.info(f"Number of images with single label: {len(single_label_images)}") # Split the data into training, validation, and test sets train_images, val_images, test_images = BaseConverter.make_splits( From 8d47a8ab249549d58b922dfa856f6d8a24946b24 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 21:31:38 +0000 Subject: [PATCH 03/31] test: add utils tests --- tests/unittests/test_utils.py | 184 ++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 tests/unittests/test_utils.py diff --git a/tests/unittests/test_utils.py b/tests/unittests/test_utils.py new file mode 100644 index 0000000..02faa19 --- /dev/null +++ b/tests/unittests/test_utils.py @@ -0,0 +1,184 @@ +import json +import os +import shutil +import unittest + +from luxonis_ml.data import LuxonisDataset +from PIL import Image +import numpy as np +import torch + +from datadreamer.utils import ( + dataset_utils, + merge_raw_datasets, +) + +def create_sample_image(image_name, image_size=(100, 100), color=(255, 0, 0), save_dir="test_images"): + """ + Create and save a simple image with a solid color. + + Args: + image_name (str): The name of the image file. + image_size (tuple): The size of the image (width, height). + color (tuple): The RGB color of the image. + save_dir (str): The directory to save the images. + """ + # Create the directory if it doesn't exist + os.makedirs(save_dir, exist_ok=True) + + # Create a blank image with the given color + img = Image.new('RGB', image_size, color) + + # Save the image to the specified directory + img.save(os.path.join(save_dir, image_name)) + + +class TestSaveAnnotationsToJson(unittest.TestCase): + + def setUp(self): + # Create a temporary directory for saving images and JSON file + self.test_dir = "test_dir" + self.image_dir = "test_images" + os.makedirs(self.test_dir, exist_ok=True) + os.makedirs(self.image_dir, exist_ok=True) + + # Create sample images + create_sample_image("image1.jpg", save_dir=self.image_dir) + create_sample_image("image2.jpg", save_dir=self.image_dir) + + self.file_name = "annotations.json" + self.image_paths = [ + os.path.join(self.image_dir, "image1.jpg"), + os.path.join(self.image_dir, "image2.jpg"), + ] + self.labels_list = [ + [0], # Labels for image1 + [1], # Labels for image2 + ] + self.labels_list = np.array(self.labels_list) + self.boxes_list = [ + [[10, 10, 50, 50]], # Bounding boxes for image1 + [[20, 20, 40, 40]], # Bounding boxes for image2 + ] + self.boxes_list = np.array(self.boxes_list) + self.class_names = ["class_1", "class_2"] + + def tearDown(self): + # Clean up the test directory after each test + for file in os.listdir(self.test_dir): + os.remove(os.path.join(self.test_dir, file)) + for file in os.listdir(self.image_dir): + os.remove(os.path.join(self.image_dir, file)) + os.rmdir(self.test_dir) + os.rmdir(self.image_dir) + + def test_save_annotations_to_json(self): + # Test saving annotations to JSON + dataset_utils.save_annotations_to_json( + self.image_paths, + self.labels_list, + boxes_list=self.boxes_list, + class_names=self.class_names, + save_dir=self.test_dir, + file_name=self.file_name, + ) + + # Load the saved JSON file and check contents + with open(os.path.join(self.test_dir, self.file_name), "r") as f: + annotations = json.load(f) + + # Check if annotations are correct + self.assertEqual(len(annotations), 3) # 2 images + class_names + self.assertIn("image1.jpg", annotations) + self.assertIn("image2.jpg", annotations) + self.assertEqual(annotations["image1.jpg"]["labels"], [0]) + self.assertEqual(annotations["image2.jpg"]["labels"], [1]) + self.assertEqual(annotations["class_names"], self.class_names) + +class TestMergeDatasets(unittest.TestCase): + + def setUp(self): + # Create temporary directories for test datasets + self.input_dir_1 = "input_dir_1" + self.input_dir_2 = "input_dir_2" + self.input_dir_3 = "input_dir_3" + self.output_dir = "output_dir" + os.makedirs(self.input_dir_1, exist_ok=True) + os.makedirs(self.input_dir_2, exist_ok=True) + os.makedirs(self.input_dir_3, exist_ok=True) + + # Create generation_args.json files + self.generation_args_1 = { + "task": "object_detection", + "class_names": ["class_1", "class_2"], + "seed": 1, + } + self.generation_args_2 = { + "task": "object_detection", + "class_names": ["class_1", "class_2"], + "seed": 2, + } + with open(os.path.join(self.input_dir_1, "generation_args.yaml"), "w") as f: + json.dump(self.generation_args_1, f) + with open(os.path.join(self.input_dir_2, "generation_args.yaml"), "w") as f: + json.dump(self.generation_args_2, f) + + # Create annotations.json files + self.annotations_1 = { + "image1.jpg": {"labels": [0]}, + "image2.jpg": {"labels": [1]}, + "class_names": ["class_1", "class_2"], + } + self.annotations_2 = { + "image3.jpg": {"labels": [0]}, + "image4.jpg": {"labels": [1]}, + "class_names": ["class_1", "class_2"], + } + with open(os.path.join(self.input_dir_1, "annotations.json"), "w") as f: + json.dump(self.annotations_1, f) + with open(os.path.join(self.input_dir_2, "annotations.json"), "w") as f: + json.dump(self.annotations_2, f) + + # Create image files + with open(os.path.join(self.input_dir_1, "image1.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + with open(os.path.join(self.input_dir_1, "image2.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + with open(os.path.join(self.input_dir_2, "image3.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + with open(os.path.join(self.input_dir_2, "image4.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + + def tearDown(self): + # Clean up the test directories after each test + shutil.rmtree(self.input_dir_1) + shutil.rmtree(self.input_dir_2) + if os.path.exists(self.output_dir): + shutil.rmtree(self.output_dir) + + def test_merge_datasets(self): + # Test merging datasets + merge_raw_datasets.merge_datasets([self.input_dir_1, self.input_dir_2], self.output_dir, copy_files=True) + + # Check if output directory is created + self.assertTrue(os.path.exists(self.output_dir)) + + # Check if annotations.json is merged correctly + with open(os.path.join(self.output_dir, "annotations.json"), "r") as f: + merged_annotations = json.load(f) + + print(merged_annotations) + + self.assertEqual(len(merged_annotations), 5) # 4 images in total + class_names + self.assertIn("image1.jpg", merged_annotations) + self.assertIn("image2.jpg", merged_annotations) + self.assertIn("image3.jpg", merged_annotations) + self.assertIn("image4.jpg", merged_annotations) + self.assertEqual(merged_annotations["class_names"], ["class_1", "class_2"]) + + # Check if images are copied correctly + for image_name in ["image1.jpg", "image2.jpg", "image3.jpg", "image4.jpg"]: + self.assertTrue(os.path.exists(os.path.join(self.output_dir, image_name))) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 32d16ad11b65d1cb8887977925c53a73fd84694d Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 21:37:14 +0000 Subject: [PATCH 04/31] style: utils tests formatting --- tests/unittests/test_utils.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/unittests/test_utils.py b/tests/unittests/test_utils.py index 02faa19..bac169b 100644 --- a/tests/unittests/test_utils.py +++ b/tests/unittests/test_utils.py @@ -3,20 +3,20 @@ import shutil import unittest -from luxonis_ml.data import LuxonisDataset -from PIL import Image import numpy as np -import torch +from PIL import Image from datadreamer.utils import ( dataset_utils, merge_raw_datasets, ) -def create_sample_image(image_name, image_size=(100, 100), color=(255, 0, 0), save_dir="test_images"): - """ - Create and save a simple image with a solid color. - + +def create_sample_image( + image_name, image_size=(100, 100), color=(255, 0, 0), save_dir="test_images" +): + """Create and save a simple image with a solid color. + Args: image_name (str): The name of the image file. image_size (tuple): The size of the image (width, height). @@ -25,16 +25,15 @@ def create_sample_image(image_name, image_size=(100, 100), color=(255, 0, 0), sa """ # Create the directory if it doesn't exist os.makedirs(save_dir, exist_ok=True) - + # Create a blank image with the given color - img = Image.new('RGB', image_size, color) - + img = Image.new("RGB", image_size, color) + # Save the image to the specified directory img.save(os.path.join(save_dir, image_name)) class TestSaveAnnotationsToJson(unittest.TestCase): - def setUp(self): # Create a temporary directory for saving images and JSON file self.test_dir = "test_dir" @@ -95,8 +94,8 @@ def test_save_annotations_to_json(self): self.assertEqual(annotations["image2.jpg"]["labels"], [1]) self.assertEqual(annotations["class_names"], self.class_names) -class TestMergeDatasets(unittest.TestCase): +class TestMergeDatasets(unittest.TestCase): def setUp(self): # Create temporary directories for test datasets self.input_dir_1 = "input_dir_1" @@ -158,7 +157,9 @@ def tearDown(self): def test_merge_datasets(self): # Test merging datasets - merge_raw_datasets.merge_datasets([self.input_dir_1, self.input_dir_2], self.output_dir, copy_files=True) + merge_raw_datasets.merge_datasets( + [self.input_dir_1, self.input_dir_2], self.output_dir, copy_files=True + ) # Check if output directory is created self.assertTrue(os.path.exists(self.output_dir)) @@ -180,5 +181,6 @@ def test_merge_datasets(self): for image_name in ["image1.jpg", "image2.jpg", "image3.jpg", "image4.jpg"]: self.assertTrue(os.path.exists(os.path.join(self.output_dir, image_name))) + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From 7a3e81eae2d98d33f1e4cc1f8c100fb1a6b9b6ca Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 21:46:21 +0000 Subject: [PATCH 05/31] fix: args extenstion in merge dataset function --- datadreamer/utils/merge_raw_datasets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datadreamer/utils/merge_raw_datasets.py b/datadreamer/utils/merge_raw_datasets.py index 77f7359..4cea85f 100644 --- a/datadreamer/utils/merge_raw_datasets.py +++ b/datadreamer/utils/merge_raw_datasets.py @@ -14,7 +14,7 @@ def merge_datasets(input_dirs, output_dir, copy_files=True): config_classes = [] random_seeds = [] for input_dir in input_dirs: - with open(os.path.join(input_dir, "generation_args.json")) as f: + with open(os.path.join(input_dir, "generation_args.yaml")) as f: generation_args = json.load(f) config_tasks.append(generation_args["task"]) config_classes.append(generation_args["class_names"]) @@ -48,12 +48,12 @@ def merge_datasets(input_dirs, output_dir, copy_files=True): if copy_files: shutil.copy( os.path.join(input_dir, "generation_args.yaml"), - os.path.join(output_dir, f"generation_args_{i}.json"), + os.path.join(output_dir, f"generation_args_{i}.yaml"), ) else: shutil.move( os.path.join(input_dir, "generation_args.yaml"), - os.path.join(output_dir, f"generation_args_{i}.json"), + os.path.join(output_dir, f"generation_args_{i}.yaml"), ) # Copy or move images From c5dea80dcbf60088a5213dfa929f5d4b093de322 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 22:12:41 +0000 Subject: [PATCH 06/31] docs: docstrings and return types --- .../dataset_annotation/clip_annotator.py | 6 +++--- .../dataset_annotation/owlv2_annotator.py | 4 ++-- datadreamer/dataset_annotation/utils.py | 2 +- .../image_generation/clip_image_tester.py | 4 +++- datadreamer/image_generation/image_generator.py | 4 ++-- .../image_generation/sdxl_image_generator.py | 4 ++-- .../sdxl_lightning_image_generator.py | 4 ++-- .../sdxl_turbo_image_generator.py | 2 +- datadreamer/utils/base_converter.py | 6 +++--- datadreamer/utils/coco_converter.py | 10 +++++++--- datadreamer/utils/convert_dataset.py | 17 ++++++++++++++++- datadreamer/utils/dataset_utils.py | 17 ++++++++++++++++- datadreamer/utils/luxonis_dataset_converter.py | 16 ++++++++++++++-- datadreamer/utils/merge_raw_datasets.py | 13 ++++++++++++- datadreamer/utils/nms.py | 2 +- datadreamer/utils/single_label_cls_converter.py | 6 ++++-- datadreamer/utils/yolo_converter.py | 10 ++++++---- 17 files changed, 95 insertions(+), 32 deletions(-) diff --git a/datadreamer/dataset_annotation/clip_annotator.py b/datadreamer/dataset_annotation/clip_annotator.py index 3c4e6cd..1da779d 100644 --- a/datadreamer/dataset_annotation/clip_annotator.py +++ b/datadreamer/dataset_annotation/clip_annotator.py @@ -50,7 +50,7 @@ def __init__( self.device = device self.model.to(self.device) - def _init_processor(self): + def _init_processor(self) -> CLIPProcessor: """Initializes the CLIP processor. Returns: @@ -60,7 +60,7 @@ def _init_processor(self): return CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") return CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") - def _init_model(self): + def _init_model(self) -> CLIPModel: """Initializes the CLIP model. Returns: @@ -87,7 +87,7 @@ def annotate_batch( synonym_dict (dict, optional): Dictionary for handling synonyms in labels. Defaults to None. Returns: - List[List[int]]: A list of lists of labels for each image. + List[np.ndarray]: A list of the annotations for each image. """ if synonym_dict is not None: objs_syn = set() diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 231558b..3537fdc 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -51,7 +51,7 @@ def __init__( self.device = device self.model.to(self.device) - def _init_model(self): + def _init_model(self) -> Owlv2ForObjectDetection: """Initializes the OWLv2 model for object detection. Returns: @@ -66,7 +66,7 @@ def _init_model(self): "google/owlv2-base-patch16-ensemble" ) - def _init_processor(self): + def _init_processor(self) -> Owlv2Processor: """Initializes the processor for the OWLv2 model. Returns: diff --git a/datadreamer/dataset_annotation/utils.py b/datadreamer/dataset_annotation/utils.py index 942d1a4..e0edc20 100644 --- a/datadreamer/dataset_annotation/utils.py +++ b/datadreamer/dataset_annotation/utils.py @@ -3,7 +3,7 @@ from torchvision import transforms -def apply_tta(image): +def apply_tta(image) -> list: """Apply test-time augmentation (TTA) to the given image. Args: diff --git a/datadreamer/image_generation/clip_image_tester.py b/datadreamer/image_generation/clip_image_tester.py index c1bf3b6..d2aeccb 100644 --- a/datadreamer/image_generation/clip_image_tester.py +++ b/datadreamer/image_generation/clip_image_tester.py @@ -33,7 +33,9 @@ def __init__(self, device: str = "cuda") -> None: self.device = device self.clip.to(self.device) - def test_image(self, image: Image.Image, objects: List[str], conf_threshold=0.05): + def test_image( + self, image: Image.Image, objects: List[str], conf_threshold=0.05 + ) -> tuple: """Tests the generated image against a set of objects using the CLIP model. Args: diff --git a/datadreamer/image_generation/image_generator.py b/datadreamer/image_generation/image_generator.py index 3e2aff7..bfbc53d 100644 --- a/datadreamer/image_generation/image_generator.py +++ b/datadreamer/image_generation/image_generator.py @@ -64,7 +64,7 @@ def __init__( self.set_seed(seed) @staticmethod - def set_seed(seed: int): + def set_seed(seed: int) -> None: """Sets the seed for random number generators in Python and PyTorch. Args: @@ -78,7 +78,7 @@ def generate_images( self, prompts: Union[str, List[str]], prompt_objects: Optional[List[List[str]]] = None, - ): + ) -> List[Image.Image]: """Generates images based on the provided prompts and optional object prompts. Args: diff --git a/datadreamer/image_generation/sdxl_image_generator.py b/datadreamer/image_generation/sdxl_image_generator.py index 6dc583e..7ccd908 100644 --- a/datadreamer/image_generation/sdxl_image_generator.py +++ b/datadreamer/image_generation/sdxl_image_generator.py @@ -36,7 +36,7 @@ def __init__(self, *args, **kwargs): self.base, self.refiner = self._init_gen_model() self.base_processor, self.refiner_processor = self._init_processor() - def _init_gen_model(self): + def _init_gen_model(self) -> tuple: """Initializes the base and refiner models of Stable Diffusion. Returns: @@ -80,7 +80,7 @@ def _init_gen_model(self): return base, refiner - def _init_processor(self): + def _init_processor(self) -> tuple: """Initializes the processors for the base and refiner models. Returns: diff --git a/datadreamer/image_generation/sdxl_lightning_image_generator.py b/datadreamer/image_generation/sdxl_lightning_image_generator.py index cc43dfe..f4520e4 100644 --- a/datadreamer/image_generation/sdxl_lightning_image_generator.py +++ b/datadreamer/image_generation/sdxl_lightning_image_generator.py @@ -40,7 +40,7 @@ def __init__(self, *args, **kwargs): self.pipe = self._init_gen_model() self.compel = self._init_compel() - def _init_gen_model(self): + def _init_gen_model(self) -> StableDiffusionXLPipeline: """Initializes the Stable Diffusion Lightning model for image generation. Returns: @@ -75,7 +75,7 @@ def _init_gen_model(self): return pipe - def _init_compel(self): + def _init_compel(self) -> Compel: """Initializes the Compel model for text prompt weighting. Returns: diff --git a/datadreamer/image_generation/sdxl_turbo_image_generator.py b/datadreamer/image_generation/sdxl_turbo_image_generator.py index 54d9dd7..abd20a0 100644 --- a/datadreamer/image_generation/sdxl_turbo_image_generator.py +++ b/datadreamer/image_generation/sdxl_turbo_image_generator.py @@ -31,7 +31,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.base = self._init_gen_model() - def _init_gen_model(self): + def _init_gen_model(self) -> AutoPipelineForText2Image: """Initializes the Stable Diffusion Turbo model for image generation. Returns: diff --git a/datadreamer/utils/base_converter.py b/datadreamer/utils/base_converter.py index 3d97199..2de019d 100644 --- a/datadreamer/utils/base_converter.py +++ b/datadreamer/utils/base_converter.py @@ -13,7 +13,7 @@ def __init__(self, seed=42): np.random.seed(seed) @abstractmethod - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: """Converts a dataset into another format. Args: @@ -28,7 +28,7 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): pass @staticmethod - def read_annotations(annotation_path): + def read_annotations(annotation_path) -> dict: """Reads annotations from a JSON file located at the specified path. Args: @@ -42,7 +42,7 @@ def read_annotations(annotation_path): return data @staticmethod - def make_splits(images, split_ratios, shuffle=True): + def make_splits(images, split_ratios, shuffle=True) -> tuple: """Splits the list of images into training, validation, and test sets. Args: diff --git a/datadreamer/utils/coco_converter.py b/datadreamer/utils/coco_converter.py index ba02d97..9e94469 100644 --- a/datadreamer/utils/coco_converter.py +++ b/datadreamer/utils/coco_converter.py @@ -31,7 +31,7 @@ class COCOConverter(BaseConverter): def __init__(self, seed=42): super().__init__(seed) - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: """Converts a dataset into a COCO format. Args: @@ -46,7 +46,9 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): data = BaseConverter.read_annotations(annotation_path) self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) - def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True): + def process_data( + self, data, image_dir, output_dir, split_ratios, copy_files=True + ) -> None: """Processes the data by dividing it into training and validation sets, and saves the images and labels in COCO format. @@ -126,7 +128,9 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru dataset_output_dir, images_info, annotations, data["class_names"] ) - def save_labels(self, dataset_output_dir, images_info, annotations, class_names): + def save_labels( + self, dataset_output_dir, images_info, annotations, class_names + ) -> None: """Saves the labels to a JSON file. Args: diff --git a/datadreamer/utils/convert_dataset.py b/datadreamer/utils/convert_dataset.py index 21f1159..874878b 100644 --- a/datadreamer/utils/convert_dataset.py +++ b/datadreamer/utils/convert_dataset.py @@ -19,7 +19,22 @@ def convert_dataset( dataset_name=None, copy_files=True, seed=42, -): +) -> None: + """Converts a dataset from one format to another. + + Args: + input_dir (str): Directory containing the images and annotations. + output_dir (str): Directory where the processed dataset will be saved. + dataset_format (str): Format of the dataset. Can be 'yolo', 'coco', 'luxonis-dataset', or 'cls-single'. + split_ratios (list): List of ratios for train, val, and test splits. + dataset_plugin (str, optional): Plugin for Luxonis dataset. Defaults to None. + dataset_name (str, optional): Name of the Luxonis dataset. Defaults to None. + copy_files (bool, optional): Whether to copy the files to the output directory. Defaults to True. + seed (int, optional): Random seed. Defaults to 42. + + No return value. + """ + if dataset_format == "yolo": converter = YOLOConverter(seed=seed) elif dataset_format == "coco": diff --git a/datadreamer/utils/dataset_utils.py b/datadreamer/utils/dataset_utils.py index a396ae0..33fe003 100644 --- a/datadreamer/utils/dataset_utils.py +++ b/datadreamer/utils/dataset_utils.py @@ -9,7 +9,22 @@ def save_annotations_to_json( class_names=None, save_dir=None, file_name="annotations.json", -): +) -> None: + """Saves annotations to a JSON file. + + Args: + image_paths (list): List of image paths. + labels_list (list): List of labels. + boxes_list (list, optional): List of bounding boxes. Defaults to None. + class_names (list, optional): List of class names. Defaults to None. + save_dir (str, optional): Directory to save the JSON file. Defaults to None. + file_name (str, optional): Name of the JSON file. Defaults to 'annotations.json'. + + No return value. + """ + if save_dir is None: + save_dir = os.getcwd() + annotations = {} for i in range(len(image_paths)): # for image_path, bboxes, labels in zip(image_paths, boxes_list, labels_list): diff --git a/datadreamer/utils/luxonis_dataset_converter.py b/datadreamer/utils/luxonis_dataset_converter.py index 9b55f79..d78acf8 100644 --- a/datadreamer/utils/luxonis_dataset_converter.py +++ b/datadreamer/utils/luxonis_dataset_converter.py @@ -20,7 +20,7 @@ def __init__(self, dataset_plugin=None, dataset_name=None, seed=42): self.dataset_plugin = dataset_plugin self.dataset_name = dataset_name - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: """Converts a dataset into a LuxonisDataset format. Args: @@ -35,7 +35,19 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): data = BaseConverter.read_annotations(annotation_path) self.process_data(data, dataset_dir, output_dir, split_ratios) - def process_data(self, data, dataset_dir, output_dir, split_ratios): + def process_data(self, data, dataset_dir, output_dir, split_ratios) -> None: + """Processes the data into LuxonisDataset format. + + Args: + - data (dict): The data to process. + - dataset_dir (str): The directory where the source dataset is located. + - output_dir (str): The directory where the processed dataset should be saved. + - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + + No return value. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) class_names = data["class_names"] image_paths = list(data.keys()) image_paths.remove("class_names") diff --git a/datadreamer/utils/merge_raw_datasets.py b/datadreamer/utils/merge_raw_datasets.py index 4cea85f..e2639fb 100644 --- a/datadreamer/utils/merge_raw_datasets.py +++ b/datadreamer/utils/merge_raw_datasets.py @@ -9,7 +9,18 @@ logger = logging.getLogger(__name__) -def merge_datasets(input_dirs, output_dir, copy_files=True): +def merge_datasets(input_dirs, output_dir, copy_files=True) -> None: + """Merges multiple raw datasets into a single dataset. + + Args: + input_dirs (List[str]): A list of input directories containing raw datasets. + output_dir (str): The output directory where the merged dataset will be saved. + copy_files (bool, optional): Whether to copy the files from the input directories + to the output directory. Defaults to True. + + No return value. + """ + # Check if all input directories exist config_tasks = [] config_classes = [] random_seeds = [] diff --git a/datadreamer/utils/nms.py b/datadreamer/utils/nms.py index 1373858..f277ab2 100644 --- a/datadreamer/utils/nms.py +++ b/datadreamer/utils/nms.py @@ -45,7 +45,7 @@ def non_max_suppression( agnostic=False, multi_label=False, max_det=300, -): +) -> list: """Runs Non-Maximum Suppression (NMS) on inference results. This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775 Args: diff --git a/datadreamer/utils/single_label_cls_converter.py b/datadreamer/utils/single_label_cls_converter.py index e447c40..523373d 100644 --- a/datadreamer/utils/single_label_cls_converter.py +++ b/datadreamer/utils/single_label_cls_converter.py @@ -35,7 +35,7 @@ class SingleLabelClsConverter(BaseConverter): def __init__(self, seed=42): super().__init__(seed) - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: """Converts a dataset into a format suitable for single-label classification. Args: @@ -50,7 +50,9 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): data = BaseConverter.read_annotations(annotation_path) self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) - def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True): + def process_data( + self, data, image_dir, output_dir, split_ratios, copy_files=True + ) -> None: """Processes the data by removing images with multiple labels, then dividing it into training and validation sets, and saves the images with single labels. diff --git a/datadreamer/utils/yolo_converter.py b/datadreamer/utils/yolo_converter.py index 36452da..dd00a4a 100644 --- a/datadreamer/utils/yolo_converter.py +++ b/datadreamer/utils/yolo_converter.py @@ -32,7 +32,7 @@ class YOLOConverter(BaseConverter): def __init__(self, seed=42): super().__init__(seed) - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: """Converts a dataset into a format suitable for training with YOLO, including creating training and validation splits. @@ -48,7 +48,7 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): data = BaseConverter.read_annotations(annotation_path) self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) - def convert_to_yolo_format(self, box, image_width, image_height): + def convert_to_yolo_format(self, box, image_width, image_height) -> list: """Converts bounding box coordinates to YOLO format. Args: @@ -65,7 +65,9 @@ def convert_to_yolo_format(self, box, image_width, image_height): height = (box[3] - box[1]) / image_height return [x_center, y_center, width, height] - def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True): + def process_data( + self, data, image_dir, output_dir, split_ratios, copy_files=True + ) -> None: """Processes the data by dividing it into training and validation sets, and saves the images and labels in YOLO format. @@ -131,7 +133,7 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru self.create_data_yaml(output_dir, data["class_names"]) - def create_data_yaml(self, root_dir, class_names): + def create_data_yaml(self, root_dir, class_names) -> None: """Creates a YAML file for dataset configuration, specifying paths and class names. From 02c2d14d042815260a68b6af0d428eb0e77aef64 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 24 Sep 2024 22:31:57 +0000 Subject: [PATCH 07/31] [Automated] Updated coverage badge --- media/coverage_badge.svg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index b4a82e6..dd6df12 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -9,13 +9,13 @@ - + coverage coverage - 56% - 56% + 61% + 61% From 8625376e0b525d16c114caf7bae411f2cbcb3567 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 22:57:40 +0000 Subject: [PATCH 08/31] fix: remove axes in bbox visualization --- datadreamer/pipelines/generate_dataset_from_scratch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 03e3a7d..d3ee3bf 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -623,11 +623,13 @@ def read_image_batch(image_batch, batch_num, batch_size): labels_list.append(np.array(labels)) + plt.axis("off") plt.savefig( os.path.join( bbox_dir, f"bbox_{i * args.batch_size_annotation + j}.jpg" ) ) + plt.close() # Save annotations as JSON files From 73686c05859734f24ec3a27a5cce62c3c24b92de Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Tue, 24 Sep 2024 23:27:57 +0000 Subject: [PATCH 09/31] tests: improve image generation tests --- tests/unittests/test_image_generation.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/unittests/test_image_generation.py b/tests/unittests/test_image_generation.py index f91fcc1..506e42c 100644 --- a/tests/unittests/test_image_generation.py +++ b/tests/unittests/test_image_generation.py @@ -25,6 +25,8 @@ def _check_clip_image_tester(device: str): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) tester = ClipImageTester(device=device) + # Check that the tester is not None + assert tester is not None passed, probs, num_passed = tester.test_image(im, ["bus"]) # Check that the image passed the test assert passed is True @@ -34,7 +36,15 @@ def _check_clip_image_tester(device: str): assert probs.shape == (1, 1) # Check that the probability is not zero assert probs[0, 0] > 0 - # Release the tester + passed_list, probs_list, num_passed_list = tester.test_images_batch([im], [["bus"]]) + # Check that the image passed the test + assert passed_list[0] is True + # Check that the number of objects passed is correct + assert num_passed_list[0] == 1 + # Check that the probability has correct shape + assert len(probs_list) == 1 + # Check that the probability is not zero + assert probs_list[0][0] > 0 tester.release(empty_cuda_cache=True if device != "cpu" else False) @@ -65,6 +75,8 @@ def _check_image_generator( device: str, ): image_generator = image_generator_class(device=device) + # Check that the image generator is not None + assert image_generator is not None # Generate images and check each of them for generated_images_batch in image_generator.generate_images( ["A photo of a cat, dog"], [["cat", "dog"]] @@ -72,6 +84,15 @@ def _check_image_generator( generated_image = generated_images_batch[0] assert generated_image is not None assert isinstance(generated_image, Image.Image) + + images = image_generator.generate_images_batch( + ["A photo of a cat, dog"], + "blurry, bad quality", + ) + assert len(images) == 1 + assert images[0] is not None + assert isinstance(images[0], Image.Image) + # Release the generator image_generator.release(empty_cuda_cache=True if device != "cpu" else False) From 4882a50b412c278a093a60027a2ffcd205b3a95c Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 24 Sep 2024 23:41:29 +0000 Subject: [PATCH 10/31] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index dd6df12..2fad913 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 61% - 61% + 62% + 62% From 4efa3162519eb7da2f62a53441129255627bbf79 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 13:57:07 +0000 Subject: [PATCH 11/31] docs: fix docstrings formatting --- datadreamer/utils/base_converter.py | 25 ++++++++-------- datadreamer/utils/coco_converter.py | 27 ++++++++--------- .../utils/luxonis_dataset_converter.py | 16 +++++----- .../utils/single_label_cls_converter.py | 18 +++++------ datadreamer/utils/yolo_converter.py | 30 +++++++++---------- 5 files changed, 57 insertions(+), 59 deletions(-) diff --git a/datadreamer/utils/base_converter.py b/datadreamer/utils/base_converter.py index 2de019d..60a3f51 100644 --- a/datadreamer/utils/base_converter.py +++ b/datadreamer/utils/base_converter.py @@ -17,11 +17,10 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non """Converts a dataset into another format. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. - + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -32,10 +31,10 @@ def read_annotations(annotation_path) -> dict: """Reads annotations from a JSON file located at the specified path. Args: - - annotation_path (str): The path to the JSON file containing annotations. + annotation_path (str): The path to the JSON file containing annotations. Returns: - - dict: A dictionary containing the data loaded from the JSON file. + dict: A dictionary containing the data loaded from the JSON file. """ with open(annotation_path) as f: data = json.load(f) @@ -46,14 +45,14 @@ def make_splits(images, split_ratios, shuffle=True) -> tuple: """Splits the list of images into training, validation, and test sets. Args: - - images (list of str): A list of image paths. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - shuffle (bool, optional): Whether to shuffle the list of images. Defaults to True. + images (list of str): A list of image paths. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + shuffle (bool, optional): Whether to shuffle the list of images. Defaults to True. Returns: - - list of str: A list of image paths for the training set. - - list of str: A list of image paths for the validation set. - - list of str: A list of image paths for the test set. + list of str: A list of image paths for the training set. + list of str: A list of image paths for the validation set. + list of str: A list of image paths for the test set. """ if shuffle: np.random.shuffle(images) diff --git a/datadreamer/utils/coco_converter.py b/datadreamer/utils/coco_converter.py index 9e94469..bcd3546 100644 --- a/datadreamer/utils/coco_converter.py +++ b/datadreamer/utils/coco_converter.py @@ -35,10 +35,10 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non """Converts a dataset into a COCO format. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -53,12 +53,11 @@ def process_data( saves the images and labels in COCO format. Args: - - data (dict): The dictionary containing image annotations. - - image_dir (str): The directory where the source images are located. - - output_dir (str): The base directory where the processed data will be saved. - - split_ratios (float): The ratio to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. - + data (dict): The dictionary containing image annotations. + image_dir (str): The directory where the source images are located. + output_dir (str): The base directory where the processed data will be saved. + split_ratios (float): The ratio to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -134,10 +133,10 @@ def save_labels( """Saves the labels to a JSON file. Args: - - dataset_output_dir (str): The directory where the labels should be saved. - - images_info (list of dict): A list of dictionaries containing image information. - - annotations (list of dict): A list of dictionaries containing annotation information. - - class_names (list of str): A list of class names. + dataset_output_dir (str): The directory where the labels should be saved. + images_info (list of dict): A list of dictionaries containing image information. + annotations (list of dict): A list of dictionaries containing annotation information. + class_names (list of str): A list of class names. No return value. """ diff --git a/datadreamer/utils/luxonis_dataset_converter.py b/datadreamer/utils/luxonis_dataset_converter.py index d78acf8..07b157e 100644 --- a/datadreamer/utils/luxonis_dataset_converter.py +++ b/datadreamer/utils/luxonis_dataset_converter.py @@ -24,10 +24,10 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non """Converts a dataset into a LuxonisDataset format. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -39,10 +39,10 @@ def process_data(self, data, dataset_dir, output_dir, split_ratios) -> None: """Processes the data into LuxonisDataset format. Args: - - data (dict): The data to process. - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + data (dict): The data to process. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. No return value. """ diff --git a/datadreamer/utils/single_label_cls_converter.py b/datadreamer/utils/single_label_cls_converter.py index 523373d..8d56ad1 100644 --- a/datadreamer/utils/single_label_cls_converter.py +++ b/datadreamer/utils/single_label_cls_converter.py @@ -39,10 +39,10 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non """Converts a dataset into a format suitable for single-label classification. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -57,11 +57,11 @@ def process_data( into training and validation sets, and saves the images with single labels. Args: - - data (dict): The dictionary containing image annotations. - - image_dir (str): The directory where the source images are located. - - output_dir (str): The base directory where the processed data will be saved. - - split_ratios (float): The ratio to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + data (dict): The dictionary containing image annotations. + image_dir (str): The directory where the source images are located. + output_dir (str): The base directory where the processed data will be saved. + split_ratios (float): The ratio to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ diff --git a/datadreamer/utils/yolo_converter.py b/datadreamer/utils/yolo_converter.py index dd00a4a..08d3e85 100644 --- a/datadreamer/utils/yolo_converter.py +++ b/datadreamer/utils/yolo_converter.py @@ -37,10 +37,10 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non creating training and validation splits. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -52,12 +52,12 @@ def convert_to_yolo_format(self, box, image_width, image_height) -> list: """Converts bounding box coordinates to YOLO format. Args: - - box (list of float): A list containing the bounding box coordinates [x_min, y_min, x_max, y_max]. - - image_width (int): The width of the image. - - image_height (int): The height of the image. + box (list of float): A list containing the bounding box coordinates [x_min, y_min, x_max, y_max]. + image_width (int): The width of the image. + image_height (int): The height of the image. Returns: - - list of float: A list containing the bounding box in YOLO format [x_center, y_center, width, height]. + list of float: A list containing the bounding box in YOLO format [x_center, y_center, width, height]. """ x_center = (box[0] + box[2]) / 2 / image_width y_center = (box[1] + box[3]) / 2 / image_height @@ -72,11 +72,11 @@ def process_data( saves the images and labels in YOLO format. Args: - - data (dict): The dictionary containing image annotations. - - image_dir (str): The directory where the source images are located. - - output_dir (str): The base directory where the processed data will be saved. - - split_ratios (float): The ratio to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + data (dict): The dictionary containing image annotations. + image_dir (str): The directory where the source images are located. + output_dir (str): The base directory where the processed data will be saved. + split_ratios (float): The ratio to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. @@ -138,8 +138,8 @@ def create_data_yaml(self, root_dir, class_names) -> None: names. Args: - - root_dir (str): The root directory where the dataset is located. - - class_names (list of str): A list of class names. + root_dir (str): The root directory where the dataset is located. + class_names (list of str): A list of class names. No return value. """ From 9a70a976674bc5cd428e9c92541dc6bc5fb8464d Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 15:00:47 +0000 Subject: [PATCH 12/31] fix: type hints --- .../dataset_annotation/clip_annotator.py | 4 ++-- .../dataset_annotation/owlv2_annotator.py | 14 ++++++------ datadreamer/dataset_annotation/utils.py | 4 +++- .../image_generation/clip_image_tester.py | 12 +++++----- .../image_generation/sdxl_image_generator.py | 6 ++--- .../prompt_generation/lm_prompt_generator.py | 4 ++-- .../prompt_generation/lm_synonym_generator.py | 4 ++-- .../prompt_generation/prompt_generator.py | 2 +- .../prompt_generation/synonym_generator.py | 6 ++--- .../tinyllama_lm_prompt_generator.py | 4 ++-- datadreamer/utils/base_converter.py | 5 +++-- .../utils/luxonis_dataset_converter.py | 17 +++++++++++--- datadreamer/utils/merge_raw_datasets.py | 5 ++++- datadreamer/utils/nms.py | 3 ++- .../utils/single_label_cls_converter.py | 18 ++++++++++++--- datadreamer/utils/yolo_converter.py | 22 +++++++++++++++---- 16 files changed, 87 insertions(+), 43 deletions(-) diff --git a/datadreamer/dataset_annotation/clip_annotator.py b/datadreamer/dataset_annotation/clip_annotator.py index 1da779d..a39d1c6 100644 --- a/datadreamer/dataset_annotation/clip_annotator.py +++ b/datadreamer/dataset_annotation/clip_annotator.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import List +from typing import Dict, List import numpy as np import PIL @@ -76,7 +76,7 @@ def annotate_batch( images: List[PIL.Image.Image], objects: List[str], conf_threshold: float = 0.1, - synonym_dict: dict[str, List[str]] | None = None, + synonym_dict: Dict[str, List[str]] | None = None, ) -> List[np.ndarray]: """Annotates images using the OWLv2 model. diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 3537fdc..89d4023 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import List, Tuple +from typing import Dict, List, Tuple import numpy as np import PIL @@ -85,7 +85,7 @@ def _generate_annotations( images: List[PIL.Image.Image], prompts: List[str], conf_threshold: float = 0.1, - ) -> List[dict[str, torch.Tensor]]: + ) -> List[Dict[str, torch.Tensor]]: """Generates annotations for the given images and prompts. Args: @@ -94,7 +94,7 @@ def _generate_annotations( conf_threshold (float, optional): Confidence threshold for the annotations. Defaults to 0.1. Returns: - dict: A dictionary containing the annotations for the images. + List[Dict[str, torch.Tensor]]: The annotations for the given images and prompts. """ n = len(images) batched_prompts = [prompts] * n @@ -119,11 +119,11 @@ def _generate_annotations( def _get_annotations( self, - pred: dict[str, torch.Tensor], + pred: Dict[str, torch.Tensor], use_tta: bool, img_dim: int, - synonym_dict: dict[str, List[str]] | None, - synonym_dict_rev: dict[int, int] | None, + synonym_dict: Dict[str, List[str]] | None, + synonym_dict_rev: Dict[int, int] | None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Extracts the annotations from the predictions. @@ -161,7 +161,7 @@ def annotate_batch( conf_threshold: float = 0.1, iou_threshold: float = 0.2, use_tta: bool = False, - synonym_dict: dict[str, List[str]] | None = None, + synonym_dict: Dict[str, List[str]] | None = None, ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: """Annotates images using the OWLv2 model. diff --git a/datadreamer/dataset_annotation/utils.py b/datadreamer/dataset_annotation/utils.py index e0edc20..bfb13b7 100644 --- a/datadreamer/dataset_annotation/utils.py +++ b/datadreamer/dataset_annotation/utils.py @@ -1,9 +1,11 @@ from __future__ import annotations +from typing import List + from torchvision import transforms -def apply_tta(image) -> list: +def apply_tta(image) -> List[transforms.Compose]: """Apply test-time augmentation (TTA) to the given image. Args: diff --git a/datadreamer/image_generation/clip_image_tester.py b/datadreamer/image_generation/clip_image_tester.py index d2aeccb..8f86a88 100644 --- a/datadreamer/image_generation/clip_image_tester.py +++ b/datadreamer/image_generation/clip_image_tester.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import List +from typing import List, Tuple import torch from PIL import Image @@ -34,8 +34,8 @@ def __init__(self, device: str = "cuda") -> None: self.clip.to(self.device) def test_image( - self, image: Image.Image, objects: List[str], conf_threshold=0.05 - ) -> tuple: + self, image: Image.Image, objects: List[str], conf_threshold: float = 0.05 + ) -> Tuple[bool, torch.Tensor, int]: """Tests the generated image against a set of objects using the CLIP model. Args: @@ -67,7 +67,7 @@ def test_images_batch( images: List[Image.Image], objects: List[List[str]], conf_threshold=0.05, - ) -> List[tuple]: + ) -> Tuple[List[bool], List[torch.Tensor], List[int]]: """Tests the generated images against a set of objects using the CLIP model. Args: @@ -76,8 +76,8 @@ def test_images_batch( conf_threshold (float, optional): Confidence threshold for considering an object as present. Defaults to 0.05. Returns: - List[tuple]: A list of tuples containing a boolean indicating if the image passes the test, - the probabilities of the objects, and the number of objects that passed the test. + Tuple[List[bool], List[torch.Tensor], List[int]]: A tuple containing a list of booleans indicating if the images pass the test, + a list of probabilities of the objects, and a list of the number of objects that passed the test. """ # Transform the inputs for the CLIP model objects_array = [] diff --git a/datadreamer/image_generation/sdxl_image_generator.py b/datadreamer/image_generation/sdxl_image_generator.py index 7ccd908..3c090de 100644 --- a/datadreamer/image_generation/sdxl_image_generator.py +++ b/datadreamer/image_generation/sdxl_image_generator.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from typing import List, Optional +from typing import List, Optional, Tuple import torch from compel import Compel, ReturnedEmbeddingsType @@ -36,7 +36,7 @@ def __init__(self, *args, **kwargs): self.base, self.refiner = self._init_gen_model() self.base_processor, self.refiner_processor = self._init_processor() - def _init_gen_model(self) -> tuple: + def _init_gen_model(self) -> Tuple[DiffusionPipeline, DiffusionPipeline]: """Initializes the base and refiner models of Stable Diffusion. Returns: @@ -80,7 +80,7 @@ def _init_gen_model(self) -> tuple: return base, refiner - def _init_processor(self) -> tuple: + def _init_processor(self) -> Tuple[Compel, Compel]: """Initializes the processors for the base and refiner models. Returns: diff --git a/datadreamer/prompt_generation/lm_prompt_generator.py b/datadreamer/prompt_generation/lm_prompt_generator.py index adbd44c..8a3e6e1 100644 --- a/datadreamer/prompt_generation/lm_prompt_generator.py +++ b/datadreamer/prompt_generation/lm_prompt_generator.py @@ -3,7 +3,7 @@ import logging import random import re -from typing import List, Literal, Optional +from typing import List, Literal, Optional, Tuple import torch from tqdm import tqdm @@ -65,7 +65,7 @@ def __init__( ) self.model, self.tokenizer, self.pipeline = self._init_lang_model() - def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: """Initializes the language model, tokenizer and pipeline for prompt generation. Returns: diff --git a/datadreamer/prompt_generation/lm_synonym_generator.py b/datadreamer/prompt_generation/lm_synonym_generator.py index a971655..850ccfb 100644 --- a/datadreamer/prompt_generation/lm_synonym_generator.py +++ b/datadreamer/prompt_generation/lm_synonym_generator.py @@ -2,7 +2,7 @@ import logging import re -from typing import List, Optional +from typing import List, Optional, Tuple import torch from transformers import ( @@ -45,7 +45,7 @@ def __init__( super().__init__(synonyms_number, seed, device) self.model, self.tokenizer, self.pipeline = self._init_lang_model() - def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: """Initializes the language model, tokenizer and pipeline for prompt generation. Returns: diff --git a/datadreamer/prompt_generation/prompt_generator.py b/datadreamer/prompt_generation/prompt_generator.py index 825243c..50662ac 100644 --- a/datadreamer/prompt_generation/prompt_generator.py +++ b/datadreamer/prompt_generation/prompt_generator.py @@ -49,7 +49,7 @@ def __init__( self.quantization = quantization if quantization is not None else "none" @staticmethod - def set_seed(seed: int): + def set_seed(seed: int) -> None: """Sets the random seed for consistent prompt generation. Args: diff --git a/datadreamer/prompt_generation/synonym_generator.py b/datadreamer/prompt_generation/synonym_generator.py index bb4240e..b5d338f 100644 --- a/datadreamer/prompt_generation/synonym_generator.py +++ b/datadreamer/prompt_generation/synonym_generator.py @@ -3,7 +3,7 @@ import json import logging from abc import ABC, abstractmethod -from typing import List, Optional +from typing import Dict, List, Optional from tqdm import tqdm @@ -41,7 +41,7 @@ def __init__( self.seed = seed self.device = device - def generate_synonyms_for_list(self, words: List[str]) -> dict: + def generate_synonyms_for_list(self, words: List[str]) -> Dict: """Generates synonyms for a list of words and returns them in a dictionary. Args: @@ -56,7 +56,7 @@ def generate_synonyms_for_list(self, words: List[str]) -> dict: synonyms_dict[word] = synonyms return synonyms_dict - def save_synonyms(self, synonyms, save_path: str) -> None: + def save_synonyms(self, synonyms: Dict, save_path: str) -> None: """Saves the generated synonyms to a JSON file. Args: diff --git a/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py b/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py index ed5fdcf..9e939a7 100644 --- a/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py +++ b/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py @@ -2,7 +2,7 @@ import logging import re -from typing import List, Literal, Optional +from typing import List, Literal, Optional, Tuple import torch from transformers import AutoModelForCausalLM, AutoTokenizer, Pipeline, pipeline @@ -50,7 +50,7 @@ def __init__( quantization, ) - def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: """Initializes the language model, tokenizer and pipeline for prompt generation. Returns: diff --git a/datadreamer/utils/base_converter.py b/datadreamer/utils/base_converter.py index 60a3f51..5c8243e 100644 --- a/datadreamer/utils/base_converter.py +++ b/datadreamer/utils/base_converter.py @@ -2,6 +2,7 @@ import json from abc import ABC, abstractmethod +from typing import Dict, List, Tuple import numpy as np @@ -27,7 +28,7 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non pass @staticmethod - def read_annotations(annotation_path) -> dict: + def read_annotations(annotation_path) -> Dict: """Reads annotations from a JSON file located at the specified path. Args: @@ -41,7 +42,7 @@ def read_annotations(annotation_path) -> dict: return data @staticmethod - def make_splits(images, split_ratios, shuffle=True) -> tuple: + def make_splits(images, split_ratios, shuffle=True) -> Tuple[List, List, List]: """Splits the list of images into training, validation, and test sets. Args: diff --git a/datadreamer/utils/luxonis_dataset_converter.py b/datadreamer/utils/luxonis_dataset_converter.py index 07b157e..9a2e6f9 100644 --- a/datadreamer/utils/luxonis_dataset_converter.py +++ b/datadreamer/utils/luxonis_dataset_converter.py @@ -2,6 +2,7 @@ import logging import os +from typing import Dict, List from luxonis_ml.data import DATASETS_REGISTRY, LuxonisDataset from luxonis_ml.data.utils.enums import BucketStorage @@ -15,12 +16,20 @@ class LuxonisDatasetConverter(BaseConverter): """Class for converting a dataset to LuxonisDataset format.""" - def __init__(self, dataset_plugin=None, dataset_name=None, seed=42): + def __init__( + self, dataset_plugin: str = None, dataset_name: str = None, seed: int = 42 + ): super().__init__(seed) self.dataset_plugin = dataset_plugin self.dataset_name = dataset_name - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + copy_files: bool = True, + ) -> None: """Converts a dataset into a LuxonisDataset format. Args: @@ -35,7 +44,9 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non data = BaseConverter.read_annotations(annotation_path) self.process_data(data, dataset_dir, output_dir, split_ratios) - def process_data(self, data, dataset_dir, output_dir, split_ratios) -> None: + def process_data( + self, data: Dict, dataset_dir: str, output_dir: str, split_ratios: List[float] + ) -> None: """Processes the data into LuxonisDataset format. Args: diff --git a/datadreamer/utils/merge_raw_datasets.py b/datadreamer/utils/merge_raw_datasets.py index e2639fb..c6eb64e 100644 --- a/datadreamer/utils/merge_raw_datasets.py +++ b/datadreamer/utils/merge_raw_datasets.py @@ -5,11 +5,14 @@ import logging import os import shutil +from typing import List logger = logging.getLogger(__name__) -def merge_datasets(input_dirs, output_dir, copy_files=True) -> None: +def merge_datasets( + input_dirs: List[str], output_dir: str, copy_files: bool = True +) -> None: """Merges multiple raw datasets into a single dataset. Args: diff --git a/datadreamer/utils/nms.py b/datadreamer/utils/nms.py index f277ab2..f0f29e4 100644 --- a/datadreamer/utils/nms.py +++ b/datadreamer/utils/nms.py @@ -7,6 +7,7 @@ import logging import os import time +from typing import List import cv2 import numpy as np @@ -45,7 +46,7 @@ def non_max_suppression( agnostic=False, multi_label=False, max_det=300, -) -> list: +) -> List[np.ndarray]: """Runs Non-Maximum Suppression (NMS) on inference results. This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775 Args: diff --git a/datadreamer/utils/single_label_cls_converter.py b/datadreamer/utils/single_label_cls_converter.py index 8d56ad1..c24bec7 100644 --- a/datadreamer/utils/single_label_cls_converter.py +++ b/datadreamer/utils/single_label_cls_converter.py @@ -3,6 +3,7 @@ import logging import os import shutil +from typing import Dict, List from datadreamer.utils import BaseConverter @@ -32,10 +33,16 @@ class SingleLabelClsConverter(BaseConverter): │ ├── class_2 """ - def __init__(self, seed=42): + def __init__(self, seed: int = 42): super().__init__(seed) - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + copy_files: bool = True, + ) -> None: """Converts a dataset into a format suitable for single-label classification. Args: @@ -51,7 +58,12 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) def process_data( - self, data, image_dir, output_dir, split_ratios, copy_files=True + self, + data: Dict, + image_dir: str, + output_dir: str, + split_ratios: List[float], + copy_files: bool = True, ) -> None: """Processes the data by removing images with multiple labels, then dividing it into training and validation sets, and saves the images with single labels. diff --git a/datadreamer/utils/yolo_converter.py b/datadreamer/utils/yolo_converter.py index 08d3e85..715e429 100644 --- a/datadreamer/utils/yolo_converter.py +++ b/datadreamer/utils/yolo_converter.py @@ -2,6 +2,7 @@ import os import shutil +from typing import Dict, List from PIL import Image @@ -32,7 +33,13 @@ class YOLOConverter(BaseConverter): def __init__(self, seed=42): super().__init__(seed) - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + copy_files: bool = True, + ): """Converts a dataset into a format suitable for training with YOLO, including creating training and validation splits. @@ -48,7 +55,9 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> Non data = BaseConverter.read_annotations(annotation_path) self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) - def convert_to_yolo_format(self, box, image_width, image_height) -> list: + def convert_to_yolo_format( + self, box: List[float], image_width: int, image_height: int + ) -> List[float]: """Converts bounding box coordinates to YOLO format. Args: @@ -66,7 +75,12 @@ def convert_to_yolo_format(self, box, image_width, image_height) -> list: return [x_center, y_center, width, height] def process_data( - self, data, image_dir, output_dir, split_ratios, copy_files=True + self, + data: Dict, + image_dir: str, + output_dir: str, + split_ratios: List[float], + copy_files: bool = True, ) -> None: """Processes the data by dividing it into training and validation sets, and saves the images and labels in YOLO format. @@ -133,7 +147,7 @@ def process_data( self.create_data_yaml(output_dir, data["class_names"]) - def create_data_yaml(self, root_dir, class_names) -> None: + def create_data_yaml(self, root_dir: str, class_names: List[str]) -> None: """Creates a YAML file for dataset configuration, specifying paths and class names. From 46e944b5803a517c9a91133ef310dc4115db1e07 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 15:37:48 +0000 Subject: [PATCH 13/31] tests: replace default ubuntu runner with buildjet runner --- .github/workflows/tests.yaml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 0b1aaf7..31a711d 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macOS-latest] + os: [buildjet-4vcpu-ubuntu-2204, windows-latest, macOS-latest] version: ['3.10', '3.11'] runs-on: ${{ matrix.os }} @@ -31,46 +31,43 @@ jobs: cache: pip - name: Install dependencies [Ubuntu] - if: matrix.os == 'ubuntu-latest' + if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' run: | sudo apt update sudo apt install -y pandoc pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - - name: Install dependencies [Windows] if: matrix.os == 'windows-latest' run: | pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - - name: Install dependencies [macOS] if: matrix.os == 'macOS-latest' run: | pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - - name: Run tests with coverage [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml - name: Run tests [Windows, macOS] - if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10' + if: matrix.os != 'buildjet-4vcpu-ubuntu-2204' || matrix.version != '3.10' run: pytest tests --junit-xml pytest.xml - name: Generate coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' run: coverage-badge -o media/coverage_badge.svg -f - name: Generate coverage report [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' uses: orgoro/coverage@v3.1 with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - name: Commit coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' run: | git config --global user.name 'GitHub Actions' git config --global user.email 'actions@github.com' @@ -78,9 +75,8 @@ jobs: git add media/coverage_badge.svg git commit -m "[Automated] Updated coverage badge" } - - name: Push changes [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' uses: ad-m/github-push-action@master with: branch: ${{ github.head_ref }} From 2639a9bb9cad26108704884039ee2485b8ea5ca7 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 16:08:14 +0000 Subject: [PATCH 14/31] fix: type hint --- datadreamer/image_generation/clip_image_tester.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadreamer/image_generation/clip_image_tester.py b/datadreamer/image_generation/clip_image_tester.py index 8f86a88..8147533 100644 --- a/datadreamer/image_generation/clip_image_tester.py +++ b/datadreamer/image_generation/clip_image_tester.py @@ -66,7 +66,7 @@ def test_images_batch( self, images: List[Image.Image], objects: List[List[str]], - conf_threshold=0.05, + conf_threshold: float = 0.05, ) -> Tuple[List[bool], List[torch.Tensor], List[int]]: """Tests the generated images against a set of objects using the CLIP model. From 261e392f5acdf9e127c25f41e76b7e90a0978e88 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 16:26:50 +0000 Subject: [PATCH 15/31] test: modify memory computation --- tests/integration/test_pipeline.py | 4 ++-- tests/unittests/test_annotators.py | 2 +- tests/unittests/test_image_generation.py | 4 ++-- tests/unittests/test_prompt_generation.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index 293d3a7..f54244e 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -8,9 +8,9 @@ import torch # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1024**3) +total_memory = psutil.virtual_memory().total / (1000**3) # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1024**3) +total_disk_space = psutil.disk_usage("/").total / (1000**3) def _check_detection_pipeline(cmd: str, target_folder: str): diff --git a/tests/unittests/test_annotators.py b/tests/unittests/test_annotators.py index 698ed3d..f69f83e 100644 --- a/tests/unittests/test_annotators.py +++ b/tests/unittests/test_annotators.py @@ -11,7 +11,7 @@ from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1024**3) +total_disk_space = psutil.disk_usage("/").total / (1000**3) def _check_owlv2_annotator(device: str, size: str = "base"): diff --git a/tests/unittests/test_image_generation.py b/tests/unittests/test_image_generation.py index 506e42c..4a73d0b 100644 --- a/tests/unittests/test_image_generation.py +++ b/tests/unittests/test_image_generation.py @@ -16,9 +16,9 @@ from datadreamer.image_generation.clip_image_tester import ClipImageTester # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1024**3) +total_memory = psutil.virtual_memory().total / (1000**3) # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1024**3) +total_disk_space = psutil.disk_usage("/").total / (1000**3) def _check_clip_image_tester(device: str): diff --git a/tests/unittests/test_prompt_generation.py b/tests/unittests/test_prompt_generation.py index e77472d..4b906ac 100644 --- a/tests/unittests/test_prompt_generation.py +++ b/tests/unittests/test_prompt_generation.py @@ -15,9 +15,9 @@ ) # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1024**3) +total_memory = psutil.virtual_memory().total / (1000**3) # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1024**3) +total_disk_space = psutil.disk_usage("/").total / (1000**3) def test_simple_prompt_generator(): From 9e812e07210f799dff0e8d4dfee39b890aa58f71 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 16:56:26 +0000 Subject: [PATCH 16/31] test: round up ram computation --- tests/integration/test_pipeline.py | 2 ++ tests/unittests/test_image_generation.py | 1 + tests/unittests/test_prompt_generation.py | 1 + 3 files changed, 4 insertions(+) diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index f54244e..7df8676 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -9,6 +9,8 @@ # Get the total memory in GB total_memory = psutil.virtual_memory().total / (1000**3) +print(total_memory) +total_memory = int(total_memory) + (total_memory > int(total_memory)) # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1000**3) diff --git a/tests/unittests/test_image_generation.py b/tests/unittests/test_image_generation.py index 4a73d0b..efbeb74 100644 --- a/tests/unittests/test_image_generation.py +++ b/tests/unittests/test_image_generation.py @@ -17,6 +17,7 @@ # Get the total memory in GB total_memory = psutil.virtual_memory().total / (1000**3) +total_memory = int(total_memory) + (total_memory > int(total_memory)) # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1000**3) diff --git a/tests/unittests/test_prompt_generation.py b/tests/unittests/test_prompt_generation.py index 4b906ac..7769a71 100644 --- a/tests/unittests/test_prompt_generation.py +++ b/tests/unittests/test_prompt_generation.py @@ -16,6 +16,7 @@ # Get the total memory in GB total_memory = psutil.virtual_memory().total / (1000**3) +total_memory = int(total_memory) + (total_memory > int(total_memory)) # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1000**3) From 6a78e00e6f487a69fd8f9af180839a589db1b70a Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 17:22:11 +0000 Subject: [PATCH 17/31] test: disable output capturing --- .github/workflows/tests.yaml | 4 ++-- tests/integration/test_pipeline.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 31a711d..a5e3a76 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -49,11 +49,11 @@ jobs: pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - name: Run tests with coverage [Ubuntu] if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' - run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml + run: pytest -s tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml - name: Run tests [Windows, macOS] if: matrix.os != 'buildjet-4vcpu-ubuntu-2204' || matrix.version != '3.10' - run: pytest tests --junit-xml pytest.xml + run: pytest -s tests --junit-xml pytest.xml - name: Generate coverage badge [Ubuntu] if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index 7df8676..fb6d3f5 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -9,10 +9,12 @@ # Get the total memory in GB total_memory = psutil.virtual_memory().total / (1000**3) -print(total_memory) +print(f"Total memory: {total_memory}") total_memory = int(total_memory) + (total_memory > int(total_memory)) +print(f"Total memory rounded: {total_memory}") # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1000**3) +print(f"Total disk space: {total_disk_space}") def _check_detection_pipeline(cmd: str, target_folder: str): From 3de930682f0c09834a6dfe0a8c3afd552951a510 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Wed, 25 Sep 2024 17:54:28 +0000 Subject: [PATCH 18/31] test: decrease required ram for demanding tests --- .github/workflows/tests.yaml | 4 +- tests/integration/test_pipeline.py | 123 +++++++++++----------- tests/unittests/test_image_generation.py | 24 ++--- tests/unittests/test_prompt_generation.py | 8 +- 4 files changed, 78 insertions(+), 81 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index a5e3a76..31a711d 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -49,11 +49,11 @@ jobs: pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - name: Run tests with coverage [Ubuntu] if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' - run: pytest -s tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml + run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml - name: Run tests [Windows, macOS] if: matrix.os != 'buildjet-4vcpu-ubuntu-2204' || matrix.version != '3.10' - run: pytest -s tests --junit-xml pytest.xml + run: pytest tests --junit-xml pytest.xml - name: Generate coverage badge [Ubuntu] if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index fb6d3f5..7cfb88a 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -9,12 +9,9 @@ # Get the total memory in GB total_memory = psutil.virtual_memory().total / (1000**3) -print(f"Total memory: {total_memory}") total_memory = int(total_memory) + (total_memory > int(total_memory)) -print(f"Total memory rounded: {total_memory}") # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1000**3) -print(f"Total disk space: {total_disk_space}") def _check_detection_pipeline(cmd: str, target_folder: str): @@ -260,8 +257,8 @@ def test_negative_num_objects_range(): # DETECTION - SIMPLE LM # ========================================================= @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_turbo_detection_pipeline(): # Define target folder @@ -282,8 +279,8 @@ def test_cpu_simple_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_turbo_detection_pipeline(): # Define target folder @@ -304,8 +301,8 @@ def test_cuda_simple_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 55, - reason="Test requires GPU, at least 16GB of RAM and 55GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 55, + reason="Test requires GPU, at least 15GB of RAM and 55GB of HDD", ) def test_cuda_simple_llm_synonym_sdxl_turbo_detection_pipeline(): # Define target folder @@ -327,8 +324,8 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_wordnet_synonym_sdxl_turbo_detection_pipeline(): # Define target folder @@ -350,8 +347,8 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_detection_pipeline(): # Define target folder @@ -372,8 +369,8 @@ def test_cpu_simple_sdxl_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_detection_pipeline(): # Define target folder @@ -394,8 +391,8 @@ def test_cuda_simple_sdxl_detection_pipeline(): @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_lightning_detection_pipeline(): # Define target folder @@ -416,8 +413,8 @@ def test_cpu_simple_sdxl_lightning_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_lightning_detection_pipeline(): # Define target folder @@ -463,8 +460,8 @@ def test_cpu_lm_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", + total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 15GB of RAM, CUDA support and 55GB of HDD", ) def test_cuda_lm_sdxl_turbo_detection_pipeline(): # Define target folder @@ -530,8 +527,8 @@ def test_cpu_lm_sdxl_detection_pipeline(): @pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", + total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 15GB of RAM, CUDA support and 55GB of HDD", ) def test_cuda_lm_sdxl_detection_pipeline(): # Define target folder @@ -578,8 +575,8 @@ def test_cuda_4bit_lm_sdxl_detection_pipeline(): # DETECTION - TinyLlama LLM # ========================================================= @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_turbo_detection_pipeline(): # Define target folder @@ -600,8 +597,8 @@ def test_cpu_tiny_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_turbo_detection_pipeline(): # Define target folder @@ -622,8 +619,8 @@ def test_cuda_tiny_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_detection_pipeline(): # Define target folder @@ -644,8 +641,8 @@ def test_cpu_tiny_sdxl_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_detection_pipeline(): # Define target folder @@ -669,8 +666,8 @@ def test_cuda_tiny_sdxl_detection_pipeline(): # CLASSIFICATION - SIMPLE LM # ========================================================= @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_turbo_classification_pipeline(): # Define target folder @@ -693,8 +690,8 @@ def test_cpu_simple_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_turbo_classification_pipeline(): # Define target folder @@ -717,8 +714,8 @@ def test_cuda_simple_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 55, - reason="Test requires GPU, at least 16GB of RAM and 55GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 55, + reason="Test requires GPU, at least 15GB of RAM and 55GB of HDD", ) def test_cuda_simple_llm_synonym_sdxl_turbo_classification_pipeline(): # Define target folder @@ -742,8 +739,8 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_wordnet_synonym_sdxl_turbo_classification_pipeline(): # Define target folder @@ -767,8 +764,8 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_classification_pipeline(): # Define target folder @@ -791,8 +788,8 @@ def test_cpu_simple_sdxl_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_classification_pipeline(): # Define target folder @@ -842,8 +839,8 @@ def test_cpu_lm_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 16GB of RAM, 55GB of HDD and CUDA support", + total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 15GB of RAM, 55GB of HDD and CUDA support", ) def test_cuda_lm_sdxl_turbo_classification_pipeline(): # Define target folder @@ -915,8 +912,8 @@ def test_cpu_lm_sdxl_classification_pipeline(): @pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", + total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 15GB of RAM, CUDA support and 55GB of HDD", ) def test_cuda_lm_sdxl_classification_pipeline(): # Define target folder @@ -967,8 +964,8 @@ def test_cuda_4bit_lm_sdxl_classification_pipeline(): # CLASSIFICATION - TinyLlama LLM # ========================================================= @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_turbo_classification_pipeline(): # Define target folder @@ -991,8 +988,8 @@ def test_cpu_tiny_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_turbo_classification_pipeline(): # Define target folder @@ -1015,8 +1012,8 @@ def test_cuda_tiny_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_classification_pipeline(): # Define target folder @@ -1039,8 +1036,8 @@ def test_cpu_tiny_sdxl_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_classification_pipeline(): # Define target folder @@ -1066,8 +1063,8 @@ def test_cuda_tiny_sdxl_classification_pipeline(): # TEST WITH CONFIG FILE # ========================================================= @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): # Define target folder @@ -1084,8 +1081,8 @@ def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): # Define target folder @@ -1102,8 +1099,8 @@ def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", + total_memory < 15 or total_disk_space < 35, + reason="Test requires at least 15GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): # Define target folder @@ -1122,8 +1119,8 @@ def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, + reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_turbo_config_classification_pipeline(): # Define target folder diff --git a/tests/unittests/test_image_generation.py b/tests/unittests/test_image_generation.py index efbeb74..0cb9105 100644 --- a/tests/unittests/test_image_generation.py +++ b/tests/unittests/test_image_generation.py @@ -99,48 +99,48 @@ def _check_image_generator( @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, - reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 25, + reason="Test requires GPU, at least 15GB of RAM and 25GB of HDD", ) def test_cuda_sdxl_image_generator(): _check_image_generator(StableDiffusionImageGenerator, "cuda") @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 25, - reason="Test requires at least 16GB of RAM and 25GB of HDD", + total_memory < 15 or total_disk_space < 25, + reason="Test requires at least 15GB of RAM and 25GB of HDD", ) def test_cpu_sdxl_image_generator(): _check_image_generator(StableDiffusionImageGenerator, "cpu") @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, - reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 25, + reason="Test requires GPU, at least 15GB of RAM and 25GB of HDD", ) def test_cuda_sdxl_turbo_image_generator(): _check_image_generator(StableDiffusionTurboImageGenerator, "cuda") @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 25, - reason="Test requires at least 16GB of RAM and 25GB of HDD", + total_memory < 15 or total_disk_space < 25, + reason="Test requires at least 15GB of RAM and 25GB of HDD", ) def test_cpu_sdxl_turbo_image_generator(): _check_image_generator(StableDiffusionTurboImageGenerator, "cpu") @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, - reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", + not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 25, + reason="Test requires GPU, at least 15GB of RAM and 25GB of HDD", ) def test_cuda_sdxl_lightning_image_generator(): _check_image_generator(StableDiffusionLightningImageGenerator, "cuda") @pytest.mark.skipif( - total_memory < 16 or total_disk_space < 25, - reason="Test requires at least 16GB of RAM and 25GB of HDD", + total_memory < 15 or total_disk_space < 25, + reason="Test requires at least 15GB of RAM and 25GB of HDD", ) def test_cpu_sdxl_lightning_image_generator(): _check_image_generator(StableDiffusionLightningImageGenerator, "cpu") diff --git a/tests/unittests/test_prompt_generation.py b/tests/unittests/test_prompt_generation.py index 7769a71..0540611 100644 --- a/tests/unittests/test_prompt_generation.py +++ b/tests/unittests/test_prompt_generation.py @@ -70,8 +70,8 @@ def _check_lm_prompt_generator( @pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, - reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", + total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 15GB of RAM, 35GB of HDD and CUDA support", ) def test_cuda_lm_prompt_generator(): _check_lm_prompt_generator("cuda") @@ -128,8 +128,8 @@ def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGener @pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, - reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", + total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 15GB of RAM, 35GB of HDD and CUDA support", ) def test_cuda_synonym_generator(): _check_synonym_generator("cuda") From a839a4afd76017f6e4a4d336ab3d0955f7f5ba52 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin <49622375+sokovninn@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:41:15 +0200 Subject: [PATCH 19/31] test: 8vpcu buildjet runner --- .github/workflows/tests.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 31a711d..9d6d82d 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -13,8 +13,8 @@ jobs: strategy: fail-fast: false matrix: - os: [buildjet-4vcpu-ubuntu-2204, windows-latest, macOS-latest] - version: ['3.10', '3.11'] + os: [buildjet-8vcpu-ubuntu-2204, windows-latest, macOS-latest] + version: ['3.10'] runs-on: ${{ matrix.os }} @@ -113,4 +113,4 @@ jobs: - name: Publish Test Results uses: EnricoMi/publish-unit-test-result-action@v2 with: - files: "artifacts/**/*.xml" \ No newline at end of file + files: "artifacts/**/*.xml" From 6e43d3abcb31e75c4ad1e6b0e1011b039bb1351a Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin <49622375+sokovninn@users.noreply.github.com> Date: Wed, 25 Sep 2024 23:49:40 +0200 Subject: [PATCH 20/31] test: fix buildjet 8cpu runner --- .github/workflows/tests.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9d6d82d..2b57aec 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -31,7 +31,7 @@ jobs: cache: pip - name: Install dependencies [Ubuntu] - if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' + if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' run: | sudo apt update sudo apt install -y pandoc @@ -48,26 +48,26 @@ jobs: pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - name: Run tests with coverage [Ubuntu] - if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml - name: Run tests [Windows, macOS] - if: matrix.os != 'buildjet-4vcpu-ubuntu-2204' || matrix.version != '3.10' + if: matrix.os != 'buildjet-8vpcu-ubuntu-2204' || matrix.version != '3.10' run: pytest tests --junit-xml pytest.xml - name: Generate coverage badge [Ubuntu] - if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' run: coverage-badge -o media/coverage_badge.svg -f - name: Generate coverage report [Ubuntu] - if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' uses: orgoro/coverage@v3.1 with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - name: Commit coverage badge [Ubuntu] - if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' run: | git config --global user.name 'GitHub Actions' git config --global user.email 'actions@github.com' @@ -76,7 +76,7 @@ jobs: git commit -m "[Automated] Updated coverage badge" } - name: Push changes [Ubuntu] - if: matrix.os == 'buildjet-4vcpu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' uses: ad-m/github-push-action@master with: branch: ${{ github.head_ref }} From 005e3b8ba9aaacecd64d2391a031e2bb96efa3eb Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin <49622375+sokovninn@users.noreply.github.com> Date: Thu, 26 Sep 2024 00:13:18 +0200 Subject: [PATCH 21/31] test: fix 8vcpu buildjet --- .github/workflows/tests.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 2b57aec..bf56969 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -31,7 +31,7 @@ jobs: cache: pip - name: Install dependencies [Ubuntu] - if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' run: | sudo apt update sudo apt install -y pandoc @@ -48,26 +48,26 @@ jobs: pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - name: Run tests with coverage [Ubuntu] - if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml - name: Run tests [Windows, macOS] - if: matrix.os != 'buildjet-8vpcu-ubuntu-2204' || matrix.version != '3.10' + if: matrix.os != 'buildjet-8vcpu-ubuntu-2204' || matrix.version != '3.10' run: pytest tests --junit-xml pytest.xml - name: Generate coverage badge [Ubuntu] - if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' run: coverage-badge -o media/coverage_badge.svg -f - name: Generate coverage report [Ubuntu] - if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' uses: orgoro/coverage@v3.1 with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - name: Commit coverage badge [Ubuntu] - if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' run: | git config --global user.name 'GitHub Actions' git config --global user.email 'actions@github.com' @@ -76,7 +76,7 @@ jobs: git commit -m "[Automated] Updated coverage badge" } - name: Push changes [Ubuntu] - if: matrix.os == 'buildjet-8vpcu-ubuntu-2204' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' uses: ad-m/github-push-action@master with: branch: ${{ github.head_ref }} From 9f3a538688e900ff3ca894454c6293643cef6830 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 00:28:30 +0000 Subject: [PATCH 22/31] test: divide tests into core and heavy --- .../workflows/{tests.yaml => core-tests.yaml} | 19 +- .github/workflows/unit-tests.yaml | 116 +++++ .../integration/sample_config.yaml | 0 tests/core_tests/integration/test_pipeline.py | 122 ++++++ .../unittests/test_annotators.py | 26 +- .../unittests/test_converters.py | 0 .../unittests/test_image_generation.py | 50 +-- .../unittests/test_pipeline_arguments.py | 213 ++++++++++ .../unittests/test_prompt_generation.py | 52 +-- .../{ => core_tests}/unittests/test_utils.py | 0 .../integration/test_pipeline.py | 398 +++--------------- .../unittests/test_image_generation.py | 69 +++ .../unittests/test_prompt_generation.py | 95 +++++ 13 files changed, 716 insertions(+), 444 deletions(-) rename .github/workflows/{tests.yaml => core-tests.yaml} (88%) create mode 100644 .github/workflows/unit-tests.yaml rename tests/{ => core_tests}/integration/sample_config.yaml (100%) create mode 100644 tests/core_tests/integration/test_pipeline.py rename tests/{ => core_tests}/unittests/test_annotators.py (80%) rename tests/{ => core_tests}/unittests/test_converters.py (100%) rename tests/{ => core_tests}/unittests/test_image_generation.py (71%) create mode 100644 tests/core_tests/unittests/test_pipeline_arguments.py rename tests/{ => core_tests}/unittests/test_prompt_generation.py (78%) rename tests/{ => core_tests}/unittests/test_utils.py (100%) rename tests/{ => heavy_tests}/integration/test_pipeline.py (69%) create mode 100644 tests/heavy_tests/unittests/test_image_generation.py create mode 100644 tests/heavy_tests/unittests/test_prompt_generation.py diff --git a/.github/workflows/tests.yaml b/.github/workflows/core-tests.yaml similarity index 88% rename from .github/workflows/tests.yaml rename to .github/workflows/core-tests.yaml index bf56969..41c96a9 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/core-tests.yaml @@ -1,12 +1,13 @@ -name: Tests +name: Core tests on: pull_request: - branches: [ dev, main ] + branches: [ main ] paths: - 'datadreamer/**/**.py' - - 'tests/**/**.py' - - .github/workflows/tests.yaml + - 'tests/core_tests/**/**.py' + - .github/workflows/core-tests.yaml + workflow_dispatch: jobs: run_tests: @@ -14,7 +15,7 @@ jobs: fail-fast: false matrix: os: [buildjet-8vcpu-ubuntu-2204, windows-latest, macOS-latest] - version: ['3.10'] + version: ['3.10', '3.11'] runs-on: ${{ matrix.os }} @@ -49,11 +50,11 @@ jobs: pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - name: Run tests with coverage [Ubuntu] if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' - run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml + run: pytest tests/core_tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml - name: Run tests [Windows, macOS] - if: matrix.os != 'buildjet-8vcpu-ubuntu-2204' || matrix.version != '3.10' - run: pytest tests --junit-xml pytest.xml + if: matrix.os != 'buildjet-8vcpu-ubuntu-2204' + run: pytest tests/core_tests --junit-xml pytest.xml - name: Generate coverage badge [Ubuntu] if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' @@ -93,7 +94,7 @@ jobs: publish-test-results: name: "Publish Tests Results" needs: run_tests - runs-on: ubuntu-latest + runs-on: buildjet-8vcpu-ubuntu-2204 permissions: checks: write pull-requests: write diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml new file mode 100644 index 0000000..59de92a --- /dev/null +++ b/.github/workflows/unit-tests.yaml @@ -0,0 +1,116 @@ +name: Unit tests + +on: + pull_request: + branches: [ dev ] + paths: + - 'datadreamer/**/**.py' + - 'tests/core_tests/unittests/**.py' + - .github/workflows/unit-tests.yaml + +jobs: + run_tests: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + version: ['3.10', '3.11'] + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.version }} + cache: pip + + - name: Install dependencies [Ubuntu] + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt update + sudo apt install -y pandoc + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + - name: Install dependencies [Windows] + if: matrix.os == 'windows-latest' + run: | + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + - name: Install dependencies [macOS] + if: matrix.os == 'macOS-latest' + run: | + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + - name: Run tests with coverage [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: pytest tests/core_tests/unittests --cov=datadreamer --cov-report xml --junit-xml pytest.xml + + - name: Run tests [Windows, macOS] + if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10' + run: pytest tests/core_tests/unittests --junit-xml pytest.xml + + - name: Generate coverage badge [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: coverage-badge -o media/coverage_badge.svg -f + + - name: Generate coverage report [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + uses: orgoro/coverage@v3.1 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Commit coverage badge [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: | + git config --global user.name 'GitHub Actions' + git config --global user.email 'actions@github.com' + git diff --quiet media/coverage_badge.svg || { + git add media/coverage_badge.svg + git commit -m "[Automated] Updated coverage badge" + } + - name: Push changes [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + uses: ad-m/github-push-action@master + with: + branch: ${{ github.head_ref }} + + - name: Upload Test Results + if: always() + uses: actions/upload-artifact@v4 + with: + name: Test Results [${{ matrix.os }}] (Python ${{ matrix.version }}) + path: pytest.xml + retention-days: 10 + if-no-files-found: error + + publish-test-results: + name: "Publish Tests Results" + needs: run_tests + runs-on: ubuntu-latest + permissions: + checks: write + pull-requests: write + if: always() + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Download Artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Publish Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + with: + files: "artifacts/**/*.xml" diff --git a/tests/integration/sample_config.yaml b/tests/core_tests/integration/sample_config.yaml similarity index 100% rename from tests/integration/sample_config.yaml rename to tests/core_tests/integration/sample_config.yaml diff --git a/tests/core_tests/integration/test_pipeline.py b/tests/core_tests/integration/test_pipeline.py new file mode 100644 index 0000000..190c0ce --- /dev/null +++ b/tests/core_tests/integration/test_pipeline.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import os +import subprocess + +import psutil +import pytest +import torch + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_detection_pipeline(cmd: str, target_folder: str): + # Run the command + result = subprocess.run(cmd, shell=True) + assert result.returncode == 0, "Command failed to run" + # Check that the target folder is a folder + assert os.path.isdir(target_folder), "Directory not created" + files = [ + "annotations.json", + "generation_args.yaml", + "prompts.json", + ] + # Check that all the files were created + for file in files: + assert os.path.isfile(os.path.join(target_folder, file)), f"{file} not created" + # Check that an image with an unique was created + assert ( + len( + list( + filter( + lambda x: "image_" in x and ".jpg" in x, os.listdir(target_folder) + ) + ) + ) + > 0 + ), "Images not created" + # Check that the "bboxes_visualization" folder was created + assert os.path.isdir( + os.path.join(target_folder, "bboxes_visualization") + ), "bboxes_visualization directory not created" + +# ========================================================= +# TEST WITH CONFIG FILE +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--config ./sample_config.yaml " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--config ./sample_config.yaml " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cpu-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator clip " + f"--config ./sample_config.yaml " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_config_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cuda-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator clip " + f"--config ./sample_config.yaml " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) diff --git a/tests/unittests/test_annotators.py b/tests/core_tests/unittests/test_annotators.py similarity index 80% rename from tests/unittests/test_annotators.py rename to tests/core_tests/unittests/test_annotators.py index f69f83e..794b898 100644 --- a/tests/unittests/test_annotators.py +++ b/tests/core_tests/unittests/test_annotators.py @@ -11,7 +11,7 @@ from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1000**3) +total_disk_space = psutil.disk_usage("/").total / (1024**3) def _check_owlv2_annotator(device: str, size: str = "base"): @@ -40,16 +40,16 @@ def _check_owlv2_annotator(device: str, size: str = "base"): @pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", ) def test_cuda_owlv2_annotator(): _check_owlv2_annotator("cuda") @pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", ) def test_cpu_owlv2_annotator(): _check_owlv2_annotator("cpu") @@ -67,32 +67,32 @@ def _check_clip_annotator(device: str, size: str = "base"): @pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", ) def test_cuda_clip_base_annotator(): _check_clip_annotator("cuda") @pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", ) def test_cpu_clip_base_annotator(): _check_clip_annotator("cpu") @pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", ) def test_cuda_clip_large_annotator(): _check_clip_annotator("cuda") @pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", ) def test_cpu_clip_large_annotator(): _check_clip_annotator("cpu") diff --git a/tests/unittests/test_converters.py b/tests/core_tests/unittests/test_converters.py similarity index 100% rename from tests/unittests/test_converters.py rename to tests/core_tests/unittests/test_converters.py diff --git a/tests/unittests/test_image_generation.py b/tests/core_tests/unittests/test_image_generation.py similarity index 71% rename from tests/unittests/test_image_generation.py rename to tests/core_tests/unittests/test_image_generation.py index 0cb9105..51fda42 100644 --- a/tests/unittests/test_image_generation.py +++ b/tests/core_tests/unittests/test_image_generation.py @@ -8,18 +8,18 @@ import torch from PIL import Image +from datadreamer.image_generation.clip_image_tester import ClipImageTester from datadreamer.image_generation import ( StableDiffusionImageGenerator, StableDiffusionLightningImageGenerator, StableDiffusionTurboImageGenerator, ) -from datadreamer.image_generation.clip_image_tester import ClipImageTester # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1000**3) -total_memory = int(total_memory) + (total_memory > int(total_memory)) +total_memory = psutil.virtual_memory().total / (1024**3) # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1000**3) +total_disk_space = psutil.disk_usage("/").total / (1024**3) + def _check_clip_image_tester(device: str): @@ -50,21 +50,20 @@ def _check_clip_image_tester(device: str): @pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", ) def test_cuda_clip_image_tester(): _check_clip_image_tester("cuda") @pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", ) def test_cpu_clip_image_tester(): _check_clip_image_tester("cpu") - def _check_image_generator( image_generator_class: Type[ Union[ @@ -97,50 +96,33 @@ def _check_image_generator( # Release the generator image_generator.release(empty_cuda_cache=True if device != "cpu" else False) - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 25, - reason="Test requires GPU, at least 15GB of RAM and 25GB of HDD", -) -def test_cuda_sdxl_image_generator(): - _check_image_generator(StableDiffusionImageGenerator, "cuda") - - -@pytest.mark.skipif( - total_memory < 15 or total_disk_space < 25, - reason="Test requires at least 15GB of RAM and 25GB of HDD", -) -def test_cpu_sdxl_image_generator(): - _check_image_generator(StableDiffusionImageGenerator, "cpu") - - @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 25, - reason="Test requires GPU, at least 15GB of RAM and 25GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", ) def test_cuda_sdxl_turbo_image_generator(): _check_image_generator(StableDiffusionTurboImageGenerator, "cuda") @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 25, - reason="Test requires at least 15GB of RAM and 25GB of HDD", + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", ) def test_cpu_sdxl_turbo_image_generator(): _check_image_generator(StableDiffusionTurboImageGenerator, "cpu") @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 25, - reason="Test requires GPU, at least 15GB of RAM and 25GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", ) def test_cuda_sdxl_lightning_image_generator(): _check_image_generator(StableDiffusionLightningImageGenerator, "cuda") @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 25, - reason="Test requires at least 15GB of RAM and 25GB of HDD", + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", ) def test_cpu_sdxl_lightning_image_generator(): _check_image_generator(StableDiffusionLightningImageGenerator, "cpu") diff --git a/tests/core_tests/unittests/test_pipeline_arguments.py b/tests/core_tests/unittests/test_pipeline_arguments.py new file mode 100644 index 0000000..198db72 --- /dev/null +++ b/tests/core_tests/unittests/test_pipeline_arguments.py @@ -0,0 +1,213 @@ +from __future__ import annotations + +import subprocess + +import pytest + + +def _check_wrong_argument_choice(cmd: str): + with pytest.raises(subprocess.CalledProcessError): + subprocess.check_call(cmd, shell=True) + + +def _check_wrong_value(cmd: str): + with pytest.raises(ValueError): + try: + subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + raise ValueError(e.output.decode()) from e + + +# ========================================================= +# ARGUMENTS CHECKS +# ========================================================= +def test_invalid_task_value(): + # Define the cmd + cmd = "datadreamer --task invalid_task" + _check_wrong_argument_choice(cmd) + + +def test_invalid_prompts_number_type(): + # Define the cmd + cmd = "datadreamer --prompts_number value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_num_objects_range_type(): + # Define the cmd + cmd = "datadreamer --num_objects_range value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_conf_threshold_range_type(): + # Define the cmd + cmd = "datadreamer --conf_threshold value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_tester_patience_type(): + # Define the cmd + cmd = "datadreamer --image_tester_patience value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_seed_type(): + # Define the cmd + cmd = "datadreamer --seed value --device cpu" + _check_wrong_argument_choice(cmd) + + +def test_invalid_prompt_generator(): + # Define the cmd + cmd = "datadreamer --prompt_generator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_generator(): + # Define the cmd + cmd = "datadreamer --image_generator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_annotator(): + # Define the cmd + cmd = "datadreamer --image_annotator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_det_image_annotator(): + # Define the cmd + cmd = "datadreamer --image_annotator clip" + _check_wrong_argument_choice(cmd) + + +def test_invalid_clf_image_annotator(): + # Define the cmd + cmd = "datadreamer --image_annotator owlv2 --task classification" + _check_wrong_argument_choice(cmd) + + +def test_invalid_device(): + # Define the cmd + cmd = "datadreamer --device invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_annotator_size(): + # Define the cmd + cmd = "datadreamer --annotator_size invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_empty_class_names(): + # Define the cmd + cmd = "datadreamer --class_names []" + _check_wrong_value(cmd) + + +def test_invalid_class_names(): + # Define the cmd + cmd = "datadreamer --class_names [2, -1]" + _check_wrong_value(cmd) + + +def test_invalid_prompts_number(): + # Define the cmd + cmd = "datadreamer --prompts_number -1" + _check_wrong_value(cmd) + + +def test_negative_conf_threshold(): + # Define the cmd + cmd = "datadreamer --conf_threshold -1" + _check_wrong_value(cmd) + + +def test_big_conf_threshold(): + # Define the cmd + cmd = "datadreamer --conf_threshold 10" + _check_wrong_value(cmd) + + +def test_negative_annotation_iou_threshold(): + # Define the cmd + cmd = "datadreamer --annotation_iou_threshold -1" + _check_wrong_value(cmd) + + +def test_big_annotation_iou_threshold(): + # Define the cmd + cmd = "datadreamer --annotation_iou_threshold 10" + _check_wrong_value(cmd) + + +def test_invalid_image_tester_patience(): + # Define the cmd + cmd = "datadreamer --image_tester_patience -1" + _check_wrong_value(cmd) + + +def test_invalid_seed(): + # Define the cmd + cmd = "datadreamer --seed -1 --device cpu" + _check_wrong_value(cmd) + + +def test_invalid_synonym_generator(): + # Define the cmd + cmd = "datadreamer --device cpu --synonym_generator invalid" + _check_wrong_value(cmd) + + +def test_invalid_lm_quantization(): + # Define the cmd + cmd = "datadreamer --device cude --lm_quantization invalid" + _check_wrong_value(cmd) + + +def test_invalid_device_lm_quantization(): + # Define the cmd + cmd = "datadreamer --device cpu --lm_quantization 4bit" + _check_wrong_value(cmd) + + +def test_invalid_batch_size_prompt(): + # Define the cmd + cmd = "datadreamer --batch_size_prompt -1" + _check_wrong_value(cmd) + + +def test_invalid_batch_size_annotation(): + # Define the cmd + cmd = "datadreamer --batch_size_annotation -1" + _check_wrong_value(cmd) + + +def test_invalid_batch_size_image(): + # Define the cmd + cmd = "datadreamer --batch_size_image -1" + _check_wrong_value(cmd) + + +def test_invalid_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 1" + _check_wrong_value(cmd) + + +def test_many_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 1 2 3" + _check_wrong_value(cmd) + + +def test_desc_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 3 1" + _check_wrong_value(cmd) + + +def test_negative_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range -3 1" + _check_wrong_value(cmd) \ No newline at end of file diff --git a/tests/unittests/test_prompt_generation.py b/tests/core_tests/unittests/test_prompt_generation.py similarity index 78% rename from tests/unittests/test_prompt_generation.py rename to tests/core_tests/unittests/test_prompt_generation.py index 0540611..fca6fc0 100644 --- a/tests/unittests/test_prompt_generation.py +++ b/tests/core_tests/unittests/test_prompt_generation.py @@ -15,10 +15,9 @@ ) # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1000**3) -total_memory = int(total_memory) + (total_memory > int(total_memory)) +total_memory = psutil.virtual_memory().total / (1024**3) # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1000**3) +total_disk_space = psutil.disk_usage("/").total / (1024**3) def test_simple_prompt_generator(): @@ -40,7 +39,6 @@ def test_simple_prompt_generator(): # Check the generated text assert prompt_text == f"A photo of a {', a '.join(selected_objects)}" - def _check_lm_prompt_generator( device: str, prompt_generator_class=LMPromptGenerator, quantization: str = "none" ): @@ -68,15 +66,6 @@ def _check_lm_prompt_generator( assert len(prompt_text) > 0 and prompt_text.lower().startswith("a photo of") prompt_generator.release(empty_cuda_cache=True if device != "cpu" else False) - -@pytest.mark.skipif( - total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 35, - reason="Test requires at least 15GB of RAM, 35GB of HDD and CUDA support", -) -def test_cuda_lm_prompt_generator(): - _check_lm_prompt_generator("cuda") - - @pytest.mark.skipif( total_memory < 12 or not torch.cuda.is_available() or total_disk_space < 25, reason="Test requires at least 12GB of RAM, 25GB of HDD and CUDA support", @@ -84,14 +73,12 @@ def test_cuda_lm_prompt_generator(): def test_cuda_4bit_lm_prompt_generator(): _check_lm_prompt_generator("cuda", quantization="4bit") - @pytest.mark.skipif( - total_memory < 32 or total_disk_space < 35, - reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", + total_memory < 12 or total_disk_space < 12, + reason="Test requires at least 12GB of RAM and 12GB of HDD for running on CPU", ) -def test_cpu_lm_prompt_generator(): - _check_lm_prompt_generator("cpu") - +def test_cpu_tinyllama_lm_prompt_generator(): + _check_lm_prompt_generator("cpu", TinyLlamaLMPromptGenerator) @pytest.mark.skipif( total_memory < 8 or not torch.cuda.is_available() or total_disk_space < 12, @@ -101,14 +88,6 @@ def test_cuda_tinyllama_lm_prompt_generator(): _check_lm_prompt_generator("cuda", TinyLlamaLMPromptGenerator) -@pytest.mark.skipif( - total_memory < 12 or total_disk_space < 12, - reason="Test requires at least 12GB of RAM and 12GB of HDD for running on CPU", -) -def test_cpu_tinyllama_lm_prompt_generator(): - _check_lm_prompt_generator("cpu", TinyLlamaLMPromptGenerator) - - def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGenerator): synonyms_num = 3 generator = synonym_generator_class(synonyms_number=synonyms_num, device=device) @@ -127,28 +106,11 @@ def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGener generator.release(empty_cuda_cache=True if device != "cpu" else False) -@pytest.mark.skipif( - total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 35, - reason="Test requires at least 15GB of RAM, 35GB of HDD and CUDA support", -) -def test_cuda_synonym_generator(): - _check_synonym_generator("cuda") - - -@pytest.mark.skipif( - total_memory < 32 or total_disk_space < 35, - reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", -) -def test_cpu_synonym_generator(): - _check_synonym_generator("cpu") - - def test_cpu_wordnet_synonym_generator(): _check_synonym_generator("cpu", WordNetSynonymGenerator) - @pytest.mark.skipif( - torch.cuda.is_available(), + not torch.cuda.is_available(), reason="Test requires CUDA support", ) def test_cuda_wordnet_synonym_generator(): diff --git a/tests/unittests/test_utils.py b/tests/core_tests/unittests/test_utils.py similarity index 100% rename from tests/unittests/test_utils.py rename to tests/core_tests/unittests/test_utils.py diff --git a/tests/integration/test_pipeline.py b/tests/heavy_tests/integration/test_pipeline.py similarity index 69% rename from tests/integration/test_pipeline.py rename to tests/heavy_tests/integration/test_pipeline.py index 7cfb88a..d368dac 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/heavy_tests/integration/test_pipeline.py @@ -8,10 +8,9 @@ import torch # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1000**3) -total_memory = int(total_memory) + (total_memory > int(total_memory)) +total_memory = psutil.virtual_memory().total / (1024*3) # Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1000**3) +total_disk_space = psutil.disk_usage("/").total / (1024**3) def _check_detection_pipeline(cmd: str, target_folder: str): @@ -45,220 +44,12 @@ def _check_detection_pipeline(cmd: str, target_folder: str): ), "bboxes_visualization directory not created" -def _check_wrong_argument_choice(cmd: str): - with pytest.raises(subprocess.CalledProcessError): - subprocess.check_call(cmd, shell=True) - - -def _check_wrong_value(cmd: str): - with pytest.raises(ValueError): - try: - subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) - except subprocess.CalledProcessError as e: - raise ValueError(e.output.decode()) from e - - -# ========================================================= -# ARGUMENTS CHECKS -# ========================================================= -def test_invalid_task_value(): - # Define the cmd - cmd = "datadreamer --task invalid_task" - _check_wrong_argument_choice(cmd) - - -def test_invalid_prompts_number_type(): - # Define the cmd - cmd = "datadreamer --prompts_number value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_num_objects_range_type(): - # Define the cmd - cmd = "datadreamer --num_objects_range value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_conf_threshold_range_type(): - # Define the cmd - cmd = "datadreamer --conf_threshold value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_image_tester_patience_type(): - # Define the cmd - cmd = "datadreamer --image_tester_patience value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_seed_type(): - # Define the cmd - cmd = "datadreamer --seed value --device cpu" - _check_wrong_argument_choice(cmd) - - -def test_invalid_prompt_generator(): - # Define the cmd - cmd = "datadreamer --prompt_generator invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_image_generator(): - # Define the cmd - cmd = "datadreamer --image_generator invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_image_annotator(): - # Define the cmd - cmd = "datadreamer --image_annotator invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_det_image_annotator(): - # Define the cmd - cmd = "datadreamer --image_annotator clip" - _check_wrong_argument_choice(cmd) - - -def test_invalid_clf_image_annotator(): - # Define the cmd - cmd = "datadreamer --image_annotator owlv2 --task classification" - _check_wrong_argument_choice(cmd) - - -def test_invalid_device(): - # Define the cmd - cmd = "datadreamer --device invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_annotator_size(): - # Define the cmd - cmd = "datadreamer --annotator_size invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_empty_class_names(): - # Define the cmd - cmd = "datadreamer --class_names []" - _check_wrong_value(cmd) - - -def test_invalid_class_names(): - # Define the cmd - cmd = "datadreamer --class_names [2, -1]" - _check_wrong_value(cmd) - - -def test_invalid_prompts_number(): - # Define the cmd - cmd = "datadreamer --prompts_number -1" - _check_wrong_value(cmd) - - -def test_negative_conf_threshold(): - # Define the cmd - cmd = "datadreamer --conf_threshold -1" - _check_wrong_value(cmd) - - -def test_big_conf_threshold(): - # Define the cmd - cmd = "datadreamer --conf_threshold 10" - _check_wrong_value(cmd) - - -def test_negative_annotation_iou_threshold(): - # Define the cmd - cmd = "datadreamer --annotation_iou_threshold -1" - _check_wrong_value(cmd) - - -def test_big_annotation_iou_threshold(): - # Define the cmd - cmd = "datadreamer --annotation_iou_threshold 10" - _check_wrong_value(cmd) - - -def test_invalid_image_tester_patience(): - # Define the cmd - cmd = "datadreamer --image_tester_patience -1" - _check_wrong_value(cmd) - - -def test_invalid_seed(): - # Define the cmd - cmd = "datadreamer --seed -1 --device cpu" - _check_wrong_value(cmd) - - -def test_invalid_synonym_generator(): - # Define the cmd - cmd = "datadreamer --device cpu --synonym_generator invalid" - _check_wrong_value(cmd) - - -def test_invalid_lm_quantization(): - # Define the cmd - cmd = "datadreamer --device cude --lm_quantization invalid" - _check_wrong_value(cmd) - - -def test_invalid_device_lm_quantization(): - # Define the cmd - cmd = "datadreamer --device cpu --lm_quantization 4bit" - _check_wrong_value(cmd) - - -def test_invalid_batch_size_prompt(): - # Define the cmd - cmd = "datadreamer --batch_size_prompt -1" - _check_wrong_value(cmd) - - -def test_invalid_batch_size_annotation(): - # Define the cmd - cmd = "datadreamer --batch_size_annotation -1" - _check_wrong_value(cmd) - - -def test_invalid_batch_size_image(): - # Define the cmd - cmd = "datadreamer --batch_size_image -1" - _check_wrong_value(cmd) - - -def test_invalid_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range 1" - _check_wrong_value(cmd) - - -def test_many_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range 1 2 3" - _check_wrong_value(cmd) - - -def test_desc_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range 3 1" - _check_wrong_value(cmd) - - -def test_negative_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range -3 1" - _check_wrong_value(cmd) - - # ========================================================= # DETECTION - SIMPLE LM # ========================================================= @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_turbo_detection_pipeline(): # Define target folder @@ -279,8 +70,8 @@ def test_cpu_simple_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_turbo_detection_pipeline(): # Define target folder @@ -301,8 +92,8 @@ def test_cuda_simple_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 55, - reason="Test requires GPU, at least 15GB of RAM and 55GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 55, + reason="Test requires GPU, at least 16GB of RAM and 55GB of HDD", ) def test_cuda_simple_llm_synonym_sdxl_turbo_detection_pipeline(): # Define target folder @@ -324,8 +115,8 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_simple_wordnet_synonym_sdxl_turbo_detection_pipeline(): # Define target folder @@ -347,8 +138,8 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_detection_pipeline(): # Define target folder @@ -369,8 +160,8 @@ def test_cpu_simple_sdxl_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_detection_pipeline(): # Define target folder @@ -391,8 +182,8 @@ def test_cuda_simple_sdxl_detection_pipeline(): @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_lightning_detection_pipeline(): # Define target folder @@ -413,8 +204,8 @@ def test_cpu_simple_sdxl_lightning_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_lightning_detection_pipeline(): # Define target folder @@ -460,8 +251,8 @@ def test_cpu_lm_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 15GB of RAM, CUDA support and 55GB of HDD", + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", ) def test_cuda_lm_sdxl_turbo_detection_pipeline(): # Define target folder @@ -527,8 +318,8 @@ def test_cpu_lm_sdxl_detection_pipeline(): @pytest.mark.skipif( - total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 15GB of RAM, CUDA support and 55GB of HDD", + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", ) def test_cuda_lm_sdxl_detection_pipeline(): # Define target folder @@ -575,8 +366,8 @@ def test_cuda_4bit_lm_sdxl_detection_pipeline(): # DETECTION - TinyLlama LLM # ========================================================= @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_turbo_detection_pipeline(): # Define target folder @@ -597,8 +388,8 @@ def test_cpu_tiny_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_turbo_detection_pipeline(): # Define target folder @@ -619,8 +410,8 @@ def test_cuda_tiny_sdxl_turbo_detection_pipeline(): @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_detection_pipeline(): # Define target folder @@ -641,8 +432,8 @@ def test_cpu_tiny_sdxl_detection_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_detection_pipeline(): # Define target folder @@ -666,8 +457,8 @@ def test_cuda_tiny_sdxl_detection_pipeline(): # CLASSIFICATION - SIMPLE LM # ========================================================= @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_turbo_classification_pipeline(): # Define target folder @@ -690,8 +481,8 @@ def test_cpu_simple_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_turbo_classification_pipeline(): # Define target folder @@ -714,8 +505,8 @@ def test_cuda_simple_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 55, - reason="Test requires GPU, at least 15GB of RAM and 55GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 55, + reason="Test requires GPU, at least 16GB of RAM and 55GB of HDD", ) def test_cuda_simple_llm_synonym_sdxl_turbo_classification_pipeline(): # Define target folder @@ -739,8 +530,8 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_simple_wordnet_synonym_sdxl_turbo_classification_pipeline(): # Define target folder @@ -764,8 +555,8 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_simple_sdxl_classification_pipeline(): # Define target folder @@ -788,8 +579,8 @@ def test_cpu_simple_sdxl_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_simple_sdxl_classification_pipeline(): # Define target folder @@ -839,8 +630,8 @@ def test_cpu_lm_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 15GB of RAM, 55GB of HDD and CUDA support", + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, 55GB of HDD and CUDA support", ) def test_cuda_lm_sdxl_turbo_classification_pipeline(): # Define target folder @@ -912,8 +703,8 @@ def test_cpu_lm_sdxl_classification_pipeline(): @pytest.mark.skipif( - total_memory < 15 or not torch.cuda.is_available() or total_disk_space < 55, - reason="Test requires at least 15GB of RAM, CUDA support and 55GB of HDD", + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", ) def test_cuda_lm_sdxl_classification_pipeline(): # Define target folder @@ -964,8 +755,8 @@ def test_cuda_4bit_lm_sdxl_classification_pipeline(): # CLASSIFICATION - TinyLlama LLM # ========================================================= @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_turbo_classification_pipeline(): # Define target folder @@ -988,8 +779,8 @@ def test_cpu_tiny_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_turbo_classification_pipeline(): # Define target folder @@ -1012,8 +803,8 @@ def test_cuda_tiny_sdxl_turbo_classification_pipeline(): @pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", ) def test_cpu_tiny_sdxl_classification_pipeline(): # Define target folder @@ -1036,8 +827,8 @@ def test_cpu_tiny_sdxl_classification_pipeline(): @pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) def test_cuda_tiny_sdxl_classification_pipeline(): # Define target folder @@ -1056,83 +847,4 @@ def test_cuda_tiny_sdxl_classification_pipeline(): f"--device cuda" ) # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - -# ========================================================= -# TEST WITH CONFIG FILE -# ========================================================= -@pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", -) -def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): - # Define target folder - target_folder = "data/data-det-cpu-simple-sdxl-turbo-config/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --save_dir {target_folder} " - f"--num_objects_range 1 2 " - f"--config ./sample_config.yaml " - f"--device cpu" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", -) -def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): - # Define target folder - target_folder = "data/data-det-cuda-simple-sdxl-turbo-config/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --save_dir {target_folder} " - f"--num_objects_range 1 2 " - f"--config ./sample_config.yaml " - f"--device cuda" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - -@pytest.mark.skipif( - total_memory < 15 or total_disk_space < 35, - reason="Test requires at least 15GB of RAM and 35GB of HDD", -) -def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): - # Define target folder - target_folder = "data/data-cls-cpu-simple-sdxl-turbo-config/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --task classification " - f"--save_dir {target_folder} " - f"--num_objects_range 1 2 " - f"--image_annotator clip " - f"--config ./sample_config.yaml " - f"--device cpu" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 15 or total_disk_space < 35, - reason="Test requires GPU, at least 15GB of RAM and 35GB of HDD", -) -def test_cuda_simple_sdxl_turbo_config_classification_pipeline(): - # Define target folder - target_folder = "data/data-cls-cuda-simple-sdxl-turbo-config/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --task classification " - f"--save_dir {target_folder} " - f"--num_objects_range 1 2 " - f"--image_annotator clip " - f"--config ./sample_config.yaml " - f"--device cuda" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) + _check_detection_pipeline(cmd, target_folder) \ No newline at end of file diff --git a/tests/heavy_tests/unittests/test_image_generation.py b/tests/heavy_tests/unittests/test_image_generation.py new file mode 100644 index 0000000..482fec2 --- /dev/null +++ b/tests/heavy_tests/unittests/test_image_generation.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from typing import Type, Union + +import psutil +import pytest +import requests +import torch +from PIL import Image + +from datadreamer.image_generation import ( + StableDiffusionImageGenerator, + StableDiffusionLightningImageGenerator, + StableDiffusionTurboImageGenerator, +) + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + +def _check_image_generator( + image_generator_class: Type[ + Union[ + StableDiffusionImageGenerator, + StableDiffusionTurboImageGenerator, + StableDiffusionLightningImageGenerator, + ] + ], + device: str, +): + image_generator = image_generator_class(device=device) + # Check that the image generator is not None + assert image_generator is not None + # Generate images and check each of them + for generated_images_batch in image_generator.generate_images( + ["A photo of a cat, dog"], [["cat", "dog"]] + ): + generated_image = generated_images_batch[0] + assert generated_image is not None + assert isinstance(generated_image, Image.Image) + + images = image_generator.generate_images_batch( + ["A photo of a cat, dog"], + "blurry, bad quality", + ) + assert len(images) == 1 + assert images[0] is not None + assert isinstance(images[0], Image.Image) + + # Release the generator + image_generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", +) +def test_cuda_sdxl_image_generator(): + _check_image_generator(StableDiffusionImageGenerator, "cuda") + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", +) +def test_cpu_sdxl_image_generator(): + _check_image_generator(StableDiffusionImageGenerator, "cpu") + diff --git a/tests/heavy_tests/unittests/test_prompt_generation.py b/tests/heavy_tests/unittests/test_prompt_generation.py new file mode 100644 index 0000000..7ffd077 --- /dev/null +++ b/tests/heavy_tests/unittests/test_prompt_generation.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import psutil +import pytest +import torch + +from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator +from datadreamer.prompt_generation.lm_synonym_generator import LMSynonymGenerator +from datadreamer.prompt_generation.simple_prompt_generator import SimplePromptGenerator +from datadreamer.prompt_generation.tinyllama_lm_prompt_generator import ( + TinyLlamaLMPromptGenerator, +) +from datadreamer.prompt_generation.wordnet_synonym_generator import ( + WordNetSynonymGenerator, +) + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + +def _check_lm_prompt_generator( + device: str, prompt_generator_class=LMPromptGenerator, quantization: str = "none" +): + object_names = ["aeroplane", "bicycle", "bird", "boat"] + prompt_generator = prompt_generator_class( + class_names=object_names, + prompts_number=2, + device=device, + quantization=quantization, + ) + prompts = prompt_generator.generate_prompts() + # Check that the some prompts were generated + assert len(prompts) > 0 + # Iterate through the prompts + for selected_objects, prompt_text in prompts: + # Selected objects aren't empty + assert len(selected_objects) > 0 + # The slected objects are in the range + assert ( + prompt_generator.num_objects_range[0] + <= len(selected_objects) + <= prompt_generator.num_objects_range[1] + ) + # Check the generated text + assert len(prompt_text) > 0 and prompt_text.lower().startswith("a photo of") + prompt_generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", +) +def test_cuda_lm_prompt_generator(): + _check_lm_prompt_generator("cuda") + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 35, + reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", +) +def test_cpu_lm_prompt_generator(): + _check_lm_prompt_generator("cpu") + +def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGenerator): + synonyms_num = 3 + generator = synonym_generator_class(synonyms_number=synonyms_num, device=device) + synonyms = generator.generate_synonyms_for_list(["astronaut", "cat", "dog"]) + # Check that the some synonyms were generated + assert len(synonyms) > 0 + # Iterate through the synonyms + for word, synonym_list in synonyms.items(): + # Check that the word is not empty + assert len(word) > 0 + # Check that the synonym list is not empty + assert len(synonym_list) > 0 + # Check that the synonyms are not empty + for synonym in synonym_list: + assert len(synonym) > 0 + generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", +) +def test_cuda_synonym_generator(): + _check_synonym_generator("cuda") + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 35, + reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", +) +def test_cpu_synonym_generator(): + _check_synonym_generator("cpu") From 153ac989f202341a530e5780ae358f43d472c40b Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 00:31:34 +0000 Subject: [PATCH 23/31] style: tests formatting --- tests/core_tests/integration/test_pipeline.py | 1 + tests/core_tests/unittests/test_image_generation.py | 5 +++-- tests/core_tests/unittests/test_pipeline_arguments.py | 2 +- tests/core_tests/unittests/test_prompt_generation.py | 5 +++++ tests/heavy_tests/integration/test_pipeline.py | 4 ++-- tests/heavy_tests/unittests/test_image_generation.py | 3 +-- tests/heavy_tests/unittests/test_prompt_generation.py | 10 +++------- 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tests/core_tests/integration/test_pipeline.py b/tests/core_tests/integration/test_pipeline.py index 190c0ce..e76f916 100644 --- a/tests/core_tests/integration/test_pipeline.py +++ b/tests/core_tests/integration/test_pipeline.py @@ -43,6 +43,7 @@ def _check_detection_pipeline(cmd: str, target_folder: str): os.path.join(target_folder, "bboxes_visualization") ), "bboxes_visualization directory not created" + # ========================================================= # TEST WITH CONFIG FILE # ========================================================= diff --git a/tests/core_tests/unittests/test_image_generation.py b/tests/core_tests/unittests/test_image_generation.py index 51fda42..2436f75 100644 --- a/tests/core_tests/unittests/test_image_generation.py +++ b/tests/core_tests/unittests/test_image_generation.py @@ -8,12 +8,12 @@ import torch from PIL import Image -from datadreamer.image_generation.clip_image_tester import ClipImageTester from datadreamer.image_generation import ( StableDiffusionImageGenerator, StableDiffusionLightningImageGenerator, StableDiffusionTurboImageGenerator, ) +from datadreamer.image_generation.clip_image_tester import ClipImageTester # Get the total memory in GB total_memory = psutil.virtual_memory().total / (1024**3) @@ -21,7 +21,6 @@ total_disk_space = psutil.disk_usage("/").total / (1024**3) - def _check_clip_image_tester(device: str): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) @@ -64,6 +63,7 @@ def test_cuda_clip_image_tester(): def test_cpu_clip_image_tester(): _check_clip_image_tester("cpu") + def _check_image_generator( image_generator_class: Type[ Union[ @@ -96,6 +96,7 @@ def _check_image_generator( # Release the generator image_generator.release(empty_cuda_cache=True if device != "cpu" else False) + @pytest.mark.skipif( not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", diff --git a/tests/core_tests/unittests/test_pipeline_arguments.py b/tests/core_tests/unittests/test_pipeline_arguments.py index 198db72..f435da7 100644 --- a/tests/core_tests/unittests/test_pipeline_arguments.py +++ b/tests/core_tests/unittests/test_pipeline_arguments.py @@ -210,4 +210,4 @@ def test_desc_num_objects_range(): def test_negative_num_objects_range(): # Define the cmd cmd = "datadreamer --num_objects_range -3 1" - _check_wrong_value(cmd) \ No newline at end of file + _check_wrong_value(cmd) diff --git a/tests/core_tests/unittests/test_prompt_generation.py b/tests/core_tests/unittests/test_prompt_generation.py index fca6fc0..f2dcd9f 100644 --- a/tests/core_tests/unittests/test_prompt_generation.py +++ b/tests/core_tests/unittests/test_prompt_generation.py @@ -39,6 +39,7 @@ def test_simple_prompt_generator(): # Check the generated text assert prompt_text == f"A photo of a {', a '.join(selected_objects)}" + def _check_lm_prompt_generator( device: str, prompt_generator_class=LMPromptGenerator, quantization: str = "none" ): @@ -66,6 +67,7 @@ def _check_lm_prompt_generator( assert len(prompt_text) > 0 and prompt_text.lower().startswith("a photo of") prompt_generator.release(empty_cuda_cache=True if device != "cpu" else False) + @pytest.mark.skipif( total_memory < 12 or not torch.cuda.is_available() or total_disk_space < 25, reason="Test requires at least 12GB of RAM, 25GB of HDD and CUDA support", @@ -73,6 +75,7 @@ def _check_lm_prompt_generator( def test_cuda_4bit_lm_prompt_generator(): _check_lm_prompt_generator("cuda", quantization="4bit") + @pytest.mark.skipif( total_memory < 12 or total_disk_space < 12, reason="Test requires at least 12GB of RAM and 12GB of HDD for running on CPU", @@ -80,6 +83,7 @@ def test_cuda_4bit_lm_prompt_generator(): def test_cpu_tinyllama_lm_prompt_generator(): _check_lm_prompt_generator("cpu", TinyLlamaLMPromptGenerator) + @pytest.mark.skipif( total_memory < 8 or not torch.cuda.is_available() or total_disk_space < 12, reason="Test requires at least 8GB of RAM, 12GB of HDD and CUDA support", @@ -109,6 +113,7 @@ def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGener def test_cpu_wordnet_synonym_generator(): _check_synonym_generator("cpu", WordNetSynonymGenerator) + @pytest.mark.skipif( not torch.cuda.is_available(), reason="Test requires CUDA support", diff --git a/tests/heavy_tests/integration/test_pipeline.py b/tests/heavy_tests/integration/test_pipeline.py index d368dac..3f48e91 100644 --- a/tests/heavy_tests/integration/test_pipeline.py +++ b/tests/heavy_tests/integration/test_pipeline.py @@ -8,7 +8,7 @@ import torch # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1024*3) +total_memory = psutil.virtual_memory().total / (1024 * 3) # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1024**3) @@ -847,4 +847,4 @@ def test_cuda_tiny_sdxl_classification_pipeline(): f"--device cuda" ) # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) \ No newline at end of file + _check_detection_pipeline(cmd, target_folder) diff --git a/tests/heavy_tests/unittests/test_image_generation.py b/tests/heavy_tests/unittests/test_image_generation.py index 482fec2..30141cc 100644 --- a/tests/heavy_tests/unittests/test_image_generation.py +++ b/tests/heavy_tests/unittests/test_image_generation.py @@ -4,7 +4,6 @@ import psutil import pytest -import requests import torch from PIL import Image @@ -19,6 +18,7 @@ # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1024**3) + def _check_image_generator( image_generator_class: Type[ Union[ @@ -66,4 +66,3 @@ def test_cuda_sdxl_image_generator(): ) def test_cpu_sdxl_image_generator(): _check_image_generator(StableDiffusionImageGenerator, "cpu") - diff --git a/tests/heavy_tests/unittests/test_prompt_generation.py b/tests/heavy_tests/unittests/test_prompt_generation.py index 7ffd077..a943f5c 100644 --- a/tests/heavy_tests/unittests/test_prompt_generation.py +++ b/tests/heavy_tests/unittests/test_prompt_generation.py @@ -6,19 +6,13 @@ from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator from datadreamer.prompt_generation.lm_synonym_generator import LMSynonymGenerator -from datadreamer.prompt_generation.simple_prompt_generator import SimplePromptGenerator -from datadreamer.prompt_generation.tinyllama_lm_prompt_generator import ( - TinyLlamaLMPromptGenerator, -) -from datadreamer.prompt_generation.wordnet_synonym_generator import ( - WordNetSynonymGenerator, -) # Get the total memory in GB total_memory = psutil.virtual_memory().total / (1024**3) # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1024**3) + def _check_lm_prompt_generator( device: str, prompt_generator_class=LMPromptGenerator, quantization: str = "none" ): @@ -54,6 +48,7 @@ def _check_lm_prompt_generator( def test_cuda_lm_prompt_generator(): _check_lm_prompt_generator("cuda") + @pytest.mark.skipif( total_memory < 32 or total_disk_space < 35, reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", @@ -61,6 +56,7 @@ def test_cuda_lm_prompt_generator(): def test_cpu_lm_prompt_generator(): _check_lm_prompt_generator("cpu") + def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGenerator): synonyms_num = 3 generator = synonym_generator_class(synonyms_number=synonyms_num, device=device) From 280c9541f5bcd20483d2116e9f46b8642c0b4efb Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 00:35:44 +0000 Subject: [PATCH 24/31] test: rename core tests --- .github/workflows/{core-tests.yaml => tests.yaml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{core-tests.yaml => tests.yaml} (98%) diff --git a/.github/workflows/core-tests.yaml b/.github/workflows/tests.yaml similarity index 98% rename from .github/workflows/core-tests.yaml rename to .github/workflows/tests.yaml index 41c96a9..ae09afd 100644 --- a/.github/workflows/core-tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,4 +1,4 @@ -name: Core tests +name: Tests on: pull_request: @@ -6,7 +6,7 @@ on: paths: - 'datadreamer/**/**.py' - 'tests/core_tests/**/**.py' - - .github/workflows/core-tests.yaml + - .github/workflows/tests.yaml workflow_dispatch: jobs: From b2da07ad681c5cbc3f0eab0b3b066eb2ea41a57a Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 00:43:38 +0000 Subject: [PATCH 25/31] test: run core tests on pull to dev --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index ae09afd..8a831bd 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -2,7 +2,7 @@ name: Tests on: pull_request: - branches: [ main ] + branches: [ main, dev ] paths: - 'datadreamer/**/**.py' - 'tests/core_tests/**/**.py' From c4dae6e6d47a6609d9ba450ffd1c705240cb1f01 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 01:08:27 +0000 Subject: [PATCH 26/31] test: fix config paths --- .github/workflows/tests.yaml | 2 +- tests/core_tests/integration/test_pipeline.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 8a831bd..2fa2ef1 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: os: [buildjet-8vcpu-ubuntu-2204, windows-latest, macOS-latest] - version: ['3.10', '3.11'] + version: ['3.10'] runs-on: ${{ matrix.os }} diff --git a/tests/core_tests/integration/test_pipeline.py b/tests/core_tests/integration/test_pipeline.py index e76f916..397143a 100644 --- a/tests/core_tests/integration/test_pipeline.py +++ b/tests/core_tests/integration/test_pipeline.py @@ -58,7 +58,7 @@ def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): cmd = ( f"datadreamer --save_dir {target_folder} " f"--num_objects_range 1 2 " - f"--config ./sample_config.yaml " + f"--config ./tests/core_tests/integration/sample_config.yaml " f"--device cpu" ) # Check the run of the pipeline @@ -76,7 +76,7 @@ def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): cmd = ( f"datadreamer --save_dir {target_folder} " f"--num_objects_range 1 2 " - f"--config ./sample_config.yaml " + f"--config ./tests/core_tests/integration/sample_config.yaml " f"--device cuda" ) # Check the run of the pipeline @@ -96,7 +96,7 @@ def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): f"--save_dir {target_folder} " f"--num_objects_range 1 2 " f"--image_annotator clip " - f"--config ./sample_config.yaml " + f"--config ./tests/core_tests/integration/sample_config.yaml " f"--device cpu" ) # Check the run of the pipeline @@ -116,7 +116,7 @@ def test_cuda_simple_sdxl_turbo_config_classification_pipeline(): f"--save_dir {target_folder} " f"--num_objects_range 1 2 " f"--image_annotator clip " - f"--config ./sample_config.yaml " + f"--config ./tests/core_tests/integration/sample_config.yaml " f"--device cuda" ) # Check the run of the pipeline From a256cbc10a8c61a57e465d947f6381fa59e9e192 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 27 Sep 2024 01:28:16 +0000 Subject: [PATCH 27/31] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 2fad913..6d24dca 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 62% - 62% + 73% + 73% From 9504f9351e6171c513034e49c84b396e0b52e9a2 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 01:46:50 +0000 Subject: [PATCH 28/31] test: update tests --- .github/workflows/tests.yaml | 7 ++- tests/core_tests/integration/test_pipeline.py | 49 +++++++++++++++++++ .../heavy_tests/integration/test_pipeline.py | 44 ----------------- 3 files changed, 54 insertions(+), 46 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 2fa2ef1..7553b16 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -15,7 +15,10 @@ jobs: fail-fast: false matrix: os: [buildjet-8vcpu-ubuntu-2204, windows-latest, macOS-latest] - version: ['3.10'] + version: ['3.10', '3.11'] + exclude: + - os: buildjet-8vcpu-ubuntu-2204 + version: '3.11' runs-on: ${{ matrix.os }} @@ -94,7 +97,7 @@ jobs: publish-test-results: name: "Publish Tests Results" needs: run_tests - runs-on: buildjet-8vcpu-ubuntu-2204 + runs-on: ubuntu-latest permissions: checks: write pull-requests: write diff --git a/tests/core_tests/integration/test_pipeline.py b/tests/core_tests/integration/test_pipeline.py index 397143a..a6eba19 100644 --- a/tests/core_tests/integration/test_pipeline.py +++ b/tests/core_tests/integration/test_pipeline.py @@ -44,6 +44,55 @@ def _check_detection_pipeline(cmd: str, target_folder: str): ), "bboxes_visualization directory not created" +# ========================================================= +# DETECTION - SIMPLE LM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--synonym_generator wordnet " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--synonym_generator wordnet " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + # ========================================================= # TEST WITH CONFIG FILE # ========================================================= diff --git a/tests/heavy_tests/integration/test_pipeline.py b/tests/heavy_tests/integration/test_pipeline.py index 3f48e91..03750ea 100644 --- a/tests/heavy_tests/integration/test_pipeline.py +++ b/tests/heavy_tests/integration/test_pipeline.py @@ -181,50 +181,6 @@ def test_cuda_simple_sdxl_detection_pipeline(): _check_detection_pipeline(cmd, target_folder) -@pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", -) -def test_cpu_simple_sdxl_lightning_detection_pipeline(): - # Define target folder - target_folder = "data/data-det-cpu-simple-sdxl-lightning/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " - f"--prompts_number 1 " - f"--prompt_generator simple " - f"--num_objects_range 1 2 " - f"--image_generator sdxl-lightning " - f"--use_image_tester " - f"--device cpu" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", -) -def test_cuda_simple_sdxl_lightning_detection_pipeline(): - # Define target folder - target_folder = "data/data-det-cuda-simple-sdxl-lightning/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " - f"--prompts_number 1 " - f"--prompt_generator simple " - f"--num_objects_range 1 2 " - f"--image_generator sdxl-lightning " - f"--use_image_tester " - f"--device cuda" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - # ========================================================= # DETECTION - LLM # ========================================================= From c16f5d744b1532f33268694d95d564af655ab8f4 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Fri, 27 Sep 2024 01:59:30 +0000 Subject: [PATCH 29/31] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 6d24dca..2fad913 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 73% - 73% + 62% + 62% From ba9fb3bda857d017aa65bbbd9e52c107bce09ebb Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 12:58:25 +0000 Subject: [PATCH 30/31] test: run core tests on pr to main --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 7553b16..6f964ac 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -2,7 +2,7 @@ name: Tests on: pull_request: - branches: [ main, dev ] + branches: [ main ] paths: - 'datadreamer/**/**.py' - 'tests/core_tests/**/**.py' From 8de4fe9cc9bb3fb389a83aa141cb4d65349b681b Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 27 Sep 2024 12:59:46 +0000 Subject: [PATCH 31/31] test: rename heavy test scripts --- .../integration/{test_pipeline.py => test_pipeline_heavy.py} | 0 .../{test_image_generation.py => test_image_generation_heavy.py} | 0 ...{test_prompt_generation.py => test_prompt_generation_heavy.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/heavy_tests/integration/{test_pipeline.py => test_pipeline_heavy.py} (100%) rename tests/heavy_tests/unittests/{test_image_generation.py => test_image_generation_heavy.py} (100%) rename tests/heavy_tests/unittests/{test_prompt_generation.py => test_prompt_generation_heavy.py} (100%) diff --git a/tests/heavy_tests/integration/test_pipeline.py b/tests/heavy_tests/integration/test_pipeline_heavy.py similarity index 100% rename from tests/heavy_tests/integration/test_pipeline.py rename to tests/heavy_tests/integration/test_pipeline_heavy.py diff --git a/tests/heavy_tests/unittests/test_image_generation.py b/tests/heavy_tests/unittests/test_image_generation_heavy.py similarity index 100% rename from tests/heavy_tests/unittests/test_image_generation.py rename to tests/heavy_tests/unittests/test_image_generation_heavy.py diff --git a/tests/heavy_tests/unittests/test_prompt_generation.py b/tests/heavy_tests/unittests/test_prompt_generation_heavy.py similarity index 100% rename from tests/heavy_tests/unittests/test_prompt_generation.py rename to tests/heavy_tests/unittests/test_prompt_generation_heavy.py