diff --git a/README.md b/README.md index 37f9af4..c3b9464 100644 --- a/README.md +++ b/README.md @@ -157,13 +157,13 @@ datadreamer --config ### 🔧 Additional Parameters -- `--task`: Choose between detection and classification. Default is `detection`. +- `--task`: Choose between detection, classification and instance segmentation. Default is `detection`. - `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`. - `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`. - `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3. - `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`. - `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`. -- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification. Default is `owlv2`. +- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`. - `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`. - `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`. - `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `""`. @@ -199,6 +199,7 @@ datadreamer --config | | [SDXL-Lightning](https://huggingface.co/ByteDance/SDXL-Lightning) | Fast and accurate (1024x1024 images) | | Image Annotation | [OWLv2](https://huggingface.co/google/owlv2-base-patch16-ensemble) | Open-Vocabulary object detector | | | [CLIP](https://huggingface.co/openai/clip-vit-base-patch32) | Zero-shot-image-classification | +| | [SlimSAM](https://huggingface.co/Zigeng/SlimSAM-uniform-50) | Zero-shot-instance-segmentation | @@ -271,6 +272,23 @@ save_dir/ } ``` +3. Instance Segmentation Annotations (instance_segmentation_annotations.json): + +- Each entry corresponds to an image and contains bounding boxes, masks and labels for objects in the image. +- Format: + +```bash +{ + "image_path": { + "boxes": [[x_min, y_min, x_max, y_max], ...], + "masks": [[[x0, y0],[x1, y1],...], [[x0, y0],[x1, y1],...], ....] + "labels": [label_index, ...] + }, + ... + "class_names": ["class1", "class2", ...] +} +``` + ## ⚠️ Limitations diff --git a/datadreamer/dataset_annotation/__init__.py b/datadreamer/dataset_annotation/__init__.py index f4da035..cfdf51a 100644 --- a/datadreamer/dataset_annotation/__init__.py +++ b/datadreamer/dataset_annotation/__init__.py @@ -3,5 +3,12 @@ from .clip_annotator import CLIPAnnotator from .image_annotator import BaseAnnotator, TaskList from .owlv2_annotator import OWLv2Annotator +from .slimsam_annotator import SlimSAMAnnotator -__all__ = ["BaseAnnotator", "TaskList", "OWLv2Annotator", "CLIPAnnotator"] +__all__ = [ + "BaseAnnotator", + "TaskList", + "OWLv2Annotator", + "CLIPAnnotator", + "SlimSAMAnnotator", +] diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 89d4023..9da41b4 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -98,7 +98,7 @@ def _generate_annotations( """ n = len(images) batched_prompts = [prompts] * n - target_sizes = torch.Tensor(images[0].size[::-1]).repeat((n, 1)).to(self.device) + target_sizes = torch.Tensor([img.size[::-1] for img in images]).to(self.device) # resize the images to the model's input size img_size = (1008, 1008) if self.size == "large" else (960, 960) @@ -121,7 +121,8 @@ def _get_annotations( self, pred: Dict[str, torch.Tensor], use_tta: bool, - img_dim: int, + img_width: int, + img_height: int, synonym_dict: Dict[str, List[str]] | None, synonym_dict_rev: Dict[int, int] | None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: @@ -130,7 +131,8 @@ def _get_annotations( Args: pred: The predictions from the model. use_tta (bool): Flag to whether the test-time augmentation was applied. - img_dim (int): The dimension of the image. + img_width (int): The width of the image. + img_height (int): The height of the image. synonym_dict (dict): Dictionary for handling synonyms in labels. synonym_dict_rev (dict): Dictionary for handling synonyms in labels. @@ -143,17 +145,44 @@ def _get_annotations( pred["scores"], pred["labels"], ) - # Flip boxes back if using TTA - if use_tta: - boxes[:, [0, 2]] = img_dim - boxes[:, [2, 0]] if synonym_dict is not None: labels = torch.tensor( [synonym_dict_rev[label.item()] for label in labels], dtype=torch.int64 ) + boxes = self._correct_bboxes_misalignment(boxes, img_width, img_height) + + # Flip boxes back if using TTA + if use_tta: + boxes[:, [0, 2]] = img_width - boxes[:, [2, 0]] + return boxes, scores, labels + def _correct_bboxes_misalignment( + self, input_boxes: torch.Tensor, width: int, height: int + ) -> List[torch.Tensor]: + """This function corrects the bounding boxes misalignment appearing when using + the `transformers==4.45.2`. + + Problem description: With a non-square aspect ratio, the predictions are shifted in the smaller dimension. + Solution: https://discuss.huggingface.co/t/owl-v2-bounding-box-misalignment-problem/66181 + + Args: + input_boxes (torch.Tensor): The bounding boxes to be corrected. + width (int): The width of the image. + height (int): The height of the image. + + Returns: + List[torch.Tensor]: The corrected bounding boxes. + """ + width_ratio = width / height if width < height else 1 + height_ratio = height / width if height < width else 1 + ratios = torch.tensor( + [width_ratio, height_ratio] * 2, device=input_boxes.device + ) + return input_boxes * ratios + def annotate_batch( self, images: List[PIL.Image.Image], @@ -206,31 +235,34 @@ def annotate_batch( final_labels = [] for i, (pred, aug_pred) in enumerate(zip(preds, augmented_preds)): + img_width, img_height = images[i].size boxes, scores, labels = self._get_annotations( pred, False, - images[i].size[0], + img_width, + img_height, synonym_dict, synonym_dict_rev if synonym_dict is not None else None, ) - all_boxes = [boxes.to("cpu")] - all_scores = [scores.to("cpu")] - all_labels = [labels.to("cpu")] + all_boxes = [boxes.cpu()] + all_scores = [scores.cpu()] + all_labels = [labels.cpu()] # Flip boxes back if using TTA if use_tta: aug_boxes, aug_scores, aug_labels = self._get_annotations( aug_pred, True, - images[i].size[0], + img_width, + img_height, synonym_dict, synonym_dict_rev if synonym_dict is not None else None, ) - all_boxes.append(aug_boxes.to("cpu")) - all_scores.append(aug_scores.to("cpu")) - all_labels.append(aug_labels.to("cpu")) + all_boxes.append(aug_boxes.cpu()) + all_scores.append(aug_scores.cpu()) + all_labels.append(aug_labels.cpu()) one_hot_labels = torch.nn.functional.one_hot( torch.cat(all_labels), num_classes=len(prompts) @@ -294,8 +326,8 @@ def release(self, empty_cuda_cache: bool = False) -> None: url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) - annotator = OWLv2Annotator(device="cpu", size="large") + annotator = OWLv2Annotator(device="cpu", size="base") final_boxes, final_scores, final_labels = annotator.annotate_batch( - [im], ["robot", "horse"] + [im], ["bus", "person"] ) annotator.release() diff --git a/datadreamer/dataset_annotation/slimsam_annotator.py b/datadreamer/dataset_annotation/slimsam_annotator.py new file mode 100644 index 0000000..7e4e7cd --- /dev/null +++ b/datadreamer/dataset_annotation/slimsam_annotator.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import logging +from typing import List + +import numpy as np +import PIL +import torch +from transformers import SamModel, SamProcessor + +from datadreamer.dataset_annotation.image_annotator import BaseAnnotator +from datadreamer.dataset_annotation.utils import mask_to_polygon + +logger = logging.getLogger(__name__) + + +class SlimSAMAnnotator(BaseAnnotator): + """A class for image annotation using the SlimSAM model, specializing in instance + segmentation. + + Attributes: + model (SAM): The SAM model for instance segmentation. + processor (SamProcessor): The processor for the SAM model. + device (str): The device on which the model will run ('cuda' for GPU, 'cpu' for CPU). + size (str): The size of the SAM model to use ('base' or 'large'). + + Methods: + _init_model(): Initializes the SAM model. + _init_processor(): Initializes the processor for the SAM model. + annotate_batch(image, prompts, conf_threshold, use_tta, synonym_dict): Annotates the given image with bounding boxes and labels. + release(empty_cuda_cache): Releases resources and optionally empties the CUDA cache. + """ + + def __init__( + self, + seed: float = 42, + device: str = "cuda", + size: str = "base", + ) -> None: + """Initializes the SAMAnnotator with a specific seed and device. + + Args: + seed (float): Seed for reproducibility. Defaults to 42. + device (str): The device to run the model on. Defaults to 'cuda'. + """ + super().__init__(seed) + self.size = size + self.model = self._init_model() + self.processor = self._init_processor() + self.device = device + self.model.to(self.device) + + def _init_model(self) -> SamModel: + """Initializes the SAM model for object detection. + + Returns: + SamModel: The initialized SAM model. + """ + logger.info(f"Initializing `SlimSAM {self.size} model...") + if self.size == "large": + return SamModel.from_pretrained("Zigeng/SlimSAM-uniform-50") + return SamModel.from_pretrained("Zigeng/SlimSAM-uniform-77") + + def _init_processor(self) -> SamProcessor: + """Initializes the processor for the SAM model. + + Returns: + SamProcessor: The initialized processor. + """ + if self.size == "large": + return SamProcessor.from_pretrained("Zigeng/SlimSAM-uniform-50") + return SamProcessor.from_pretrained("Zigeng/SlimSAM-uniform-77") + + def annotate_batch( + self, + images: List[PIL.Image.Image], + boxes_batch: List[np.ndarray], + iou_threshold: float = 0.2, + ) -> List[List[List[float]]]: + """Annotates images for the task of instance segmentation using the SlimSAM + model. + + Args: + images: The images to be annotated. + boxes_batch: The bounding boxes of found objects. + iou_threshold (float, optional): Intersection over union threshold for non-maximum suppression. Defaults to 0.2. + + Returns: + List: A list containing the final segment masks represented as a polygon. + """ + final_segments = [] + + n = len(images) + + for i in range(n): + boxes = boxes_batch[i].tolist() + if len(boxes) == 0: + final_segments.append([]) + continue + + inputs = self.processor( + images[i], input_boxes=[boxes], return_tensors="pt" + ).to(self.device) + + with torch.no_grad(): + outputs = self.model(**inputs, return_dict=True) + + masks = self.processor.image_processor.post_process_masks( + outputs.pred_masks.cpu(), + inputs["original_sizes"].cpu(), + inputs["reshaped_input_sizes"].cpu(), + )[0] + + iou_scores = outputs.iou_scores.cpu() + + image_masks = [] + for j in range(len(boxes)): + keep_idx = iou_scores[0, j] >= iou_threshold + filtered_masks = masks[j, keep_idx].cpu().float() + final_masks = filtered_masks.permute(1, 2, 0) + final_masks = final_masks.mean(axis=-1) + final_masks = (final_masks > 0).int() + final_masks = final_masks.numpy().astype(np.uint8) + polygon = mask_to_polygon(final_masks) + if len(polygon) != 0: + image_masks.append(polygon) + + final_segments.append(image_masks) + + return final_segments + + def release(self, empty_cuda_cache: bool = False) -> None: + """Releases the model and optionally empties the CUDA cache. + + Args: + empty_cuda_cache (bool, optional): Whether to empty the CUDA cache. Defaults to False. + """ + self.model = self.model.to("cpu") + if empty_cuda_cache: + with torch.no_grad(): + torch.cuda.empty_cache() + + +if __name__ == "__main__": + import requests + from PIL import Image + + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = SlimSAMAnnotator(device="cpu", size="large") + final_segments = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])]) + print(len(final_segments), len(final_segments[0])) + print(final_segments[0][0][:5]) diff --git a/datadreamer/dataset_annotation/utils.py b/datadreamer/dataset_annotation/utils.py index bfb13b7..dd6b643 100644 --- a/datadreamer/dataset_annotation/utils.py +++ b/datadreamer/dataset_annotation/utils.py @@ -2,6 +2,8 @@ from typing import List +import cv2 +import numpy as np from torchvision import transforms @@ -32,3 +34,27 @@ def apply_tta(image) -> List[transforms.Compose]: augmented_images = [t(image) for t in tta_transforms] return augmented_images + + +def mask_to_polygon(mask: np.ndarray) -> List[List[int]]: + """Converts a binary mask to a polygon. + + Args: + mask: The binary mask to be converted. + + Returns: + List: A list of vertices of the polygon. + """ + # Find contours in the binary mask + contours, _ = cv2.findContours( + mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE + ) + if len(contours) == 0: + return [] + # Find the contour with the largest area + largest_contour = max(contours, key=cv2.contourArea) + + # Extract the vertices of the contour + polygon = largest_contour.reshape(-1, 2).tolist() + + return polygon diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 33811bf..81908a5 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -16,7 +16,11 @@ from PIL import Image from tqdm import tqdm -from datadreamer.dataset_annotation import CLIPAnnotator, OWLv2Annotator +from datadreamer.dataset_annotation import ( + CLIPAnnotator, + OWLv2Annotator, + SlimSAMAnnotator, +) from datadreamer.image_generation import ( StableDiffusionImageGenerator, StableDiffusionLightningImageGenerator, @@ -54,6 +58,8 @@ det_annotators = {"owlv2": OWLv2Annotator} clf_annotators = {"clip": CLIPAnnotator} +inst_seg_annotators = {"owlv2-slimsam": SlimSAMAnnotator} +inst_seg_detectors = {"owlv2-slimsam": OWLv2Annotator} setup_logging(use_rich=True) @@ -70,7 +76,7 @@ def parse_args(): parser.add_argument( "--task", type=str, - choices=["detection", "classification"], + choices=["detection", "classification", "instance-segmentation"], help="Task to generate data for", ) @@ -116,7 +122,7 @@ def parse_args(): parser.add_argument( "--image_annotator", type=str, - choices=["owlv2", "clip"], + choices=["owlv2", "clip", "owlv2-slimsam"], help="Image annotator to use", ) @@ -357,6 +363,14 @@ def check_args(args): "--image_annotator must be one of the available annotators for classification task" ) + if ( + args.task == "instance-segmentation" + and args.image_annotator not in inst_seg_annotators + ): + raise ValueError( + "--image_annotator must be one of the available annotators for instance segmentation task" + ) + # Check coorect task and dataset_format if args.task == "classification" and args.dataset_format in ["coco", "yolo"]: raise ValueError( @@ -368,6 +382,11 @@ def check_args(args): "--dataset_format must be one of the available dataset formats for detection task: raw, coco, yolo, luxonis-dataset" ) + if args.task == "instance-segmentation" and args.dataset_format in ["cls-single"]: + raise ValueError( + "--dataset_format must be one of the available dataset formats for instance segmentation task: raw, coco, yolo, luxonis-dataset" + ) + # Check split_ratios if ( len(args.split_ratios) != 3 @@ -540,6 +559,7 @@ def read_image_batch(image_batch, batch_num, batch_size): boxes_list = [] scores_list = [] labels_list = [] + segment_list = [] image_paths = [] if args.task == "classification": @@ -583,7 +603,14 @@ def read_image_batch(image_batch, batch_num, batch_size): ) else: # Detection annotation - annotator_class = det_annotators[args.image_annotator] + if args.task == "detection": + annotator_class = det_annotators[args.image_annotator] + else: + annotator_class = inst_seg_detectors[args.image_annotator] + inst_seg_annotator_class = inst_seg_annotators[args.image_annotator] + inst_seg_annotator = inst_seg_annotator_class( + device=args.device, size=args.annotator_size + ) annotator = annotator_class(device=args.device, size=args.annotator_size) for i, image_batch in tqdm( @@ -608,14 +635,31 @@ def read_image_batch(image_batch, batch_num, batch_size): boxes_list.extend(boxes_batch) scores_list.extend(scores_batch) + if args.task == "instance-segmentation": + masks_batch = inst_seg_annotator.annotate_batch( + images=images, + boxes_batch=boxes_batch, + iou_threshold=args.annotation_iou_threshold, + ) + segment_list.extend(masks_batch) + for j, image in enumerate(images): labels = [] # Save bbox visualizations fig, ax = plt.subplots(1) ax.imshow(image) - for box, score, label in zip( - boxes_batch[j], scores_batch[j], local_labels_batch[j] - ): + for k in range(len(boxes_batch[j])): + box = boxes_batch[j][k] + score = scores_batch[j][k] + label = local_labels_batch[j][k] + + if args.task == "instance-segmentation": + if k < len(masks_batch[j]): + mask = masks_batch[j][k] + x_points, y_points = zip(*mask) + + ax.fill(x_points, y_points, label, alpha=0.5) + labels.append(label) x1, y1, x2, y2 = box rect = patches.Rectangle( @@ -658,6 +702,7 @@ def read_image_batch(image_batch, batch_num, batch_size): image_paths=image_paths, labels_list=labels_list, boxes_list=boxes_list, + masks_list=segment_list if len(segment_list) > 0 else None, class_names=args.class_names, save_dir=save_dir, ) @@ -670,6 +715,7 @@ def read_image_batch(image_batch, batch_num, batch_size): "yolo", args.split_ratios, copy_files=False, + is_instance_segmentation=args.task == "instance-segmentation", seed=args.seed, ) # Convert annotations to COCO format @@ -679,6 +725,7 @@ def read_image_batch(image_batch, batch_num, batch_size): args.save_dir, "coco", args.split_ratios, + is_instance_segmentation=args.task == "instance-segmentation", copy_files=False, seed=args.seed, ) @@ -692,6 +739,7 @@ def read_image_batch(image_batch, batch_num, batch_size): args.split_ratios, dataset_plugin=args.dataset_plugin, dataset_name=args.dataset_name, + is_instance_segmentation=args.task == "instance-segmentation", copy_files=False, seed=args.seed, ) diff --git a/datadreamer/utils/coco_converter.py b/datadreamer/utils/coco_converter.py index bcd3546..bb69a78 100644 --- a/datadreamer/utils/coco_converter.py +++ b/datadreamer/utils/coco_converter.py @@ -4,6 +4,7 @@ import os import shutil +import numpy as np from PIL import Image from datadreamer.utils.base_converter import BaseConverter @@ -28,8 +29,9 @@ class COCOConverter(BaseConverter): │ ├── labels.json """ - def __init__(self, seed=42): + def __init__(self, seed=42, is_instance_segmentation: bool = False): super().__init__(seed) + self.is_instance_segmentation = is_instance_segmentation def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True) -> None: """Converts a dataset into a COCO format. @@ -99,19 +101,36 @@ def process_data( "height": image_height, } ) + masks = ( + annotation.get("masks") + if self.is_instance_segmentation + else [None] * len(annotation["boxes"]) + ) + + # Loop through boxes, labels, and masks, appending to annotations + for box, label, mask in zip( + annotation["boxes"], annotation["labels"], masks + ): + bbox = [box[0], box[1], box[2] - box[0], box[3] - box[1]] + segmentation = ( + np.array(mask).reshape(1, -1).tolist() + if mask is not None + else None + ) + area = (box[2] - box[0]) * (box[3] - box[1]) - for box, label in zip(annotation["boxes"], annotation["labels"]): annotations.append( { "id": annotation_id, "image_id": len(images_info), "category_id": label, - "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]], - "segmentation": None, # [[box[0], box[1], box[2], box[1], box[2], box[3], box[0], box[3]]], # bbox mask - "area": (box[2] - box[0]) * (box[3] - box[1]), + "bbox": bbox, + "segmentation": segmentation, + "area": area, "iscrowd": 0, } ) + annotation_id += 1 if copy_files: diff --git a/datadreamer/utils/config.py b/datadreamer/utils/config.py index c114321..9d36267 100644 --- a/datadreamer/utils/config.py +++ b/datadreamer/utils/config.py @@ -10,7 +10,7 @@ class Config(LuxonisConfig): save_dir: str = "generated_dataset" class_names: List[str] = ["bear", "bicycle", "bird", "person"] prompts_number: int = 10 - task: Literal["detection", "classification"] = "detection" + task: Literal["detection", "classification", "instance-segmentation"] = "detection" seed: int = 42 device: Literal["cuda", "cpu"] = "cuda" annotate_only: bool = False @@ -39,7 +39,7 @@ class Config(LuxonisConfig): # Profanity filter arguments disable_lm_filter: bool = False # Annotation arguments - image_annotator: Literal["owlv2", "clip"] = "owlv2" + image_annotator: Literal["owlv2", "clip", "owlv2-slimsam"] = "owlv2" conf_threshold: float = 0.15 annotation_iou_threshold: float = 0.2 use_tta: bool = False diff --git a/datadreamer/utils/convert_dataset.py b/datadreamer/utils/convert_dataset.py index 874878b..2e063ed 100644 --- a/datadreamer/utils/convert_dataset.py +++ b/datadreamer/utils/convert_dataset.py @@ -17,6 +17,7 @@ def convert_dataset( split_ratios, dataset_plugin=None, dataset_name=None, + is_instance_segmentation=False, copy_files=True, seed=42, ) -> None: @@ -36,14 +37,19 @@ def convert_dataset( """ if dataset_format == "yolo": - converter = YOLOConverter(seed=seed) + converter = YOLOConverter( + seed=seed, is_instance_segmentation=is_instance_segmentation + ) elif dataset_format == "coco": - converter = COCOConverter(seed=seed) + converter = COCOConverter( + seed=seed, is_instance_segmentation=is_instance_segmentation + ) elif dataset_format == "luxonis-dataset": converter = LuxonisDatasetConverter( dataset_plugin=dataset_plugin, dataset_name=dataset_name, seed=seed, + is_instance_segmentation=is_instance_segmentation, ) elif dataset_format == "cls-single": converter = SingleLabelClsConverter(seed=seed) diff --git a/datadreamer/utils/dataset_utils.py b/datadreamer/utils/dataset_utils.py index 33fe003..a1c5971 100644 --- a/datadreamer/utils/dataset_utils.py +++ b/datadreamer/utils/dataset_utils.py @@ -6,6 +6,7 @@ def save_annotations_to_json( image_paths, labels_list, boxes_list=None, + masks_list=None, class_names=None, save_dir=None, file_name="annotations.json", @@ -16,6 +17,7 @@ def save_annotations_to_json( image_paths (list): List of image paths. labels_list (list): List of labels. boxes_list (list, optional): List of bounding boxes. Defaults to None. + masks_list (list, optional): List of instance segmentation masks. Defaults to None. class_names (list, optional): List of class names. Defaults to None. save_dir (str, optional): Directory to save the JSON file. Defaults to None. file_name (str, optional): Name of the JSON file. Defaults to 'annotations.json'. @@ -38,6 +40,10 @@ def save_annotations_to_json( bboxes = boxes_list[i] annotations[image_name]["boxes"] = bboxes.tolist() + if masks_list is not None: + masks = masks_list[i] + annotations[image_name]["masks"] = masks + annotations["class_names"] = class_names # Save to JSON file diff --git a/datadreamer/utils/luxonis_dataset_converter.py b/datadreamer/utils/luxonis_dataset_converter.py index 9a2e6f9..8462ea1 100644 --- a/datadreamer/utils/luxonis_dataset_converter.py +++ b/datadreamer/utils/luxonis_dataset_converter.py @@ -17,12 +17,22 @@ class LuxonisDatasetConverter(BaseConverter): """Class for converting a dataset to LuxonisDataset format.""" def __init__( - self, dataset_plugin: str = None, dataset_name: str = None, seed: int = 42 + self, + dataset_plugin: str = None, + dataset_name: str = None, + seed: int = 42, + is_instance_segmentation: bool = False, ): super().__init__(seed) + self.is_instance_segmentation = is_instance_segmentation self.dataset_plugin = dataset_plugin self.dataset_name = dataset_name + if self.is_instance_segmentation: + logger.warning( + "Instance segmentation will be treated as semantic segmentation until the support for instance segmentation is added to Luxonis-ml." + ) + def convert( self, dataset_dir: str, @@ -80,6 +90,22 @@ def dataset_generator(): }, } + if "masks" in data[image_path]: # polyline format + masks = data[image_path]["masks"] + for mask, label in zip(masks, labels): + poly = [] + poly += [ + (point[0] / width, point[1] / height) for point in mask + ] + yield { + "file": image_full_path, + "annotation": { + "type": "polyline", + "class": class_names[label], + "points": poly, # masks, + }, + } + if "boxes" in data[image_path]: boxes = data[image_path]["boxes"] for box, label in zip(boxes, labels): diff --git a/datadreamer/utils/yolo_converter.py b/datadreamer/utils/yolo_converter.py index 715e429..5f8fc51 100644 --- a/datadreamer/utils/yolo_converter.py +++ b/datadreamer/utils/yolo_converter.py @@ -4,6 +4,7 @@ import shutil from typing import Dict, List +import numpy as np from PIL import Image from datadreamer.utils import BaseConverter @@ -30,8 +31,9 @@ class YOLOConverter(BaseConverter): │ ├── labels """ - def __init__(self, seed=42): + def __init__(self, seed=42, is_instance_segmentation: bool = False): super().__init__(seed) + self.is_instance_segmentation = is_instance_segmentation def convert( self, @@ -74,6 +76,21 @@ def convert_to_yolo_format( height = (box[3] - box[1]) / image_height return [x_center, y_center, width, height] + def convert_masks_to_yolo_format( + self, masks: List[List[float]], w: int, h: int + ) -> List[float]: + """Converts masks to YOLO format. + + Args: + masks (list of list of float): A list containing the masks. + w (int): The width of the image. + h (int): The height of the image. + + Returns: + list of float: A list containing the masks in YOLO format. + """ + return (np.array(masks) / np.array([w, h])).reshape(-1).tolist() + def process_data( self, data: Dict, @@ -130,11 +147,22 @@ def process_data( label_output_dir, os.path.splitext(image_name)[0] + ".txt" ) with open(label_file, "w") as f: - for box, label in zip(annotation["boxes"], annotation["labels"]): - yolo_box = self.convert_to_yolo_format( - box, image_width, image_height - ) - f.write(f"{label} {' '.join(map(str, yolo_box))}\n") + if self.is_instance_segmentation: + for masks, label in zip( + annotation["masks"], annotation["labels"] + ): + yolo_box = self.convert_masks_to_yolo_format( + masks, image_width, image_height + ) + f.write(f"{label} {' '.join(map(str, yolo_box))}\n") + else: + for box, label in zip( + annotation["boxes"], annotation["labels"] + ): + yolo_box = self.convert_to_yolo_format( + box, image_width, image_height + ) + f.write(f"{label} {' '.join(map(str, yolo_box))}\n") if copy_files: shutil.copy( diff --git a/examples/generate_dataset_and_train_yolo.ipynb b/examples/generate_dataset_and_train_yolo.ipynb index 4f5cc17..9ad1359 100644 --- a/examples/generate_dataset_and_train_yolo.ipynb +++ b/examples/generate_dataset_and_train_yolo.ipynb @@ -78,13 +78,13 @@ "- `--class_names` (required): Space-separated list of object names for image generation and annotation. Example: `person moon robot`.\n", "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to `10`.\n", "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to `False`.\n", - "- `--task`: Choose between detection and classification. Default is `detection`.\n", + "- `--task`: Choose between detection, classification and instance segmentation. Default is `detection`.\n", "- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.\n", "- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.\n", "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", "- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`.\n", "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.\n", - "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification. Default is `owlv2`.\n", + "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`.\n", "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", @@ -96,6 +96,7 @@ "- `--image_tester_patience`: Patience level for image tester. Default is `1`.\n", "- `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.\n", "- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.\n", + "- `--disable_lm_filter`: Use only a bad word list for profanity filtering. Default is `False`.\n", "- `--batch_size_prompt`: Batch size for prompt generation. Default is 64.\n", "- `--batch_size_annotation`: Batch size for annotation. Default is `1`.\n", "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n", diff --git a/examples/generate_instance_segmentation_dataset_and_train_yolo.ipynb b/examples/generate_instance_segmentation_dataset_and_train_yolo.ipynb new file mode 100644 index 0000000..11f9812 --- /dev/null +++ b/examples/generate_instance_segmentation_dataset_and_train_yolo.ipynb @@ -0,0 +1,422 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8ce1517f-7258-406d-9139-9adadb1a1570", + "metadata": { + "id": "8ce1517f-7258-406d-9139-9adadb1a1570" + }, + "source": [ + "\n", + "\n", + "# DataDreamer Tutorial: Generating a dataset for instance segmentation, training a model, and deploying it to the OAK (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5_2ivH03etO", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b5_2ivH03etO", + "outputId": "c92b1e2e-cd3e-4a7d-8be6-776e0dfad5bc" + }, + "outputs": [], + "source": [ + "!pip install -q datadreamer@git+https://github.com/luxonis/datadreamer@dev" + ] + }, + { + "cell_type": "markdown", + "id": "c3704c07", + "metadata": { + "id": "c3704c07" + }, + "source": [ + "## Generate a dataset with your own classes (might take some time to download all models)" + ] + }, + { + "cell_type": "markdown", + "id": "M4v-QieP4tXL", + "metadata": { + "id": "M4v-QieP4tXL" + }, + "source": [ + "Make sure you are using the GPU runtime type (in Google Colab).\n", + "\n", + "~4 min to generate 30 images\n", + "\n", + "~43 secs to annotate them" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab1e2f9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6ab1e2f9", + "outputId": "6f57eb7a-f261-46bc-e574-3631cade8660", + "scrolled": true + }, + "outputs": [], + "source": [ + "!datadreamer --save_dir generated_dataset \\\n", + " --class_names tractor horse bear \\\n", + " --prompts_number 30 \\\n", + " --prompt_generator simple \\\n", + " --num_objects_range 1 1 \\\n", + " --image_generator sdxl-turbo \\\n", + " --task instance-segmentation \\\n", + " --disable_lm_filter \\\n", + " --annotator_size base \\\n", + " --use_tta \\\n", + " --image_annotator owlv2-slimsam \\\n", + " --conf_threshold 0.2 \\\n", + " --seed 42" + ] + }, + { + "cell_type": "markdown", + "id": "7a10755e", + "metadata": { + "id": "7a10755e" + }, + "source": [ + "### Parameters\n", + "- `--save_dir` (required): Path to the directory for saving generated images and annotations.\n", + "- `--class_names` (required): Space-separated list of object names for image generation and annotation. Example: `person moon robot`.\n", + "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to `10`.\n", + "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to `False`.\n", + "- `--task`: Choose between detection, classification and instance segmentation. Default is `detection`.\n", + "- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.\n", + "- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.\n", + "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", + "- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`.\n", + "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.\n", + "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`.\n", + "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", + "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", + "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", + "- `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `\", hd, 8k, highly detailed\"`.\n", + "- `--negative_prompt`: Negative prompts to guide the generation away from certain features. Default is `\"cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution, static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy\"`.\n", + "- `--use_tta`: Toggle test time augmentation for object detection. Default is `False`.\n", + "- `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`.\n", + "- `--use_image_tester`: Use image tester for image generation. Default is `False`.\n", + "- `--image_tester_patience`: Patience level for image tester. Default is `1`.\n", + "- `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.\n", + "- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.\n", + "- `--disable_lm_filter`: Use only a bad word list for profanity filtering. Default is `False`.\n", + "- `--batch_size_prompt`: Batch size for prompt generation. Default is 64.\n", + "- `--batch_size_annotation`: Batch size for annotation. Default is `1`.\n", + "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n", + "- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.\n", + "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n", + "- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7add74d9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 497 + }, + "id": "7add74d9", + "outputId": "cafd066a-b524-4006-e2d0-cd949d65c567" + }, + "outputs": [ + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "from IPython.display import Image\n", + "\n", + "Image(filename=os.path.join(\"generated_dataset/bboxes_visualization\", \"bbox_5.jpg\"))" + ] + }, + { + "cell_type": "markdown", + "id": "64fe2dc9", + "metadata": { + "id": "64fe2dc9" + }, + "source": [ + "## Convert the dataset to YOLO format" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3dd01a6a", + "metadata": { + "id": "3dd01a6a" + }, + "outputs": [], + "source": [ + "from datadreamer.utils.convert_dataset import convert_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9b9bb74d", + "metadata": { + "id": "9b9bb74d" + }, + "outputs": [], + "source": [ + "convert_dataset(\n", + " input_dir=\"generated_dataset\",\n", + " output_dir=\"generated_dataset_yolo\",\n", + " dataset_format=\"yolo\",\n", + " split_ratios=[0.8, 0.1, 0.1],\n", + " copy_files=True,\n", + " is_instance_segmentation=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a167a842", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a167a842", + "outputId": "715988c2-ab27-4ce2-b12c-2fa01188c537" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data.yaml test train\tval\n" + ] + } + ], + "source": [ + "!ls generated_dataset_yolo" + ] + }, + { + "cell_type": "markdown", + "id": "d2d660b0", + "metadata": { + "id": "d2d660b0" + }, + "source": [ + "# Train your model (YOLOv8 as an example)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "982e475e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "982e475e", + "outputId": "1f4cb9f5-1d01-4882-a730-434e5122546f", + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install -q ultralytics" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "184cf0fa", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "184cf0fa", + "outputId": "dcc43a26-bc78-4d3d-ddb3-6932a8584df9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating new Ultralytics Settings v0.0.6 file ✅ \n", + "View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'\n", + "Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.\n", + "Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt to 'yolov8n-seg.pt'...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 6.74M/6.74M [00:00<00:00, 110MB/s]\n" + ] + } + ], + "source": [ + "from ultralytics import YOLO\n", + "\n", + "model = YOLO(\"yolov8n-seg.pt\") # load a pretrained model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb4e6754", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "bb4e6754", + "outputId": "66b28d5a-6544-46fa-ee73-3074f141e981", + "scrolled": true + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['WANDB_DISABLED'] = 'true'\n", + "\n", + "results = model.train(data=\"generated_dataset_yolo/data.yaml\", epochs=200)" + ] + }, + { + "cell_type": "markdown", + "id": "d8b05e33", + "metadata": { + "id": "d8b05e33" + }, + "source": [ + "## Show the predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b559b1f9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "b559b1f9", + "outputId": "bcb3fae6-27eb-4384-f532-c573ae45c599" + }, + "outputs": [ + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(filename=os.path.join(results.save_dir, \"val_batch0_pred.jpg\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "dec0cb11", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dec0cb11", + "outputId": "72cf4330-fa0f-47aa-82c5-242dc6978dcd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ultralytics 8.3.21 🚀 Python-3.10.12 torch-2.5.0+cu121 CUDA:0 (Tesla T4, 15102MiB)\n", + "YOLOv8n-seg summary (fused): 195 layers, 3,258,649 parameters, 0 gradients, 12.0 GFLOPs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/generated_dataset_yolo/val/labels.cache... 3 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3/3 [00:00=2.0.0 torchvision>=0.16.0 -transformers>=4.37.0 +transformers>=4.45.2 diffusers>=0.24.0 compel>=2.0.0 tqdm>=4.0.0 @@ -12,6 +12,6 @@ accelerate>=0.25.0 scipy>=1.10.0 bitsandbytes>=0.42.0 nltk>=3.8.1 -luxonis-ml[all]>=0.3.0 +luxonis-ml[all]>=0.4.1 python-box>=7.1.1 -gcsfs>=2023.1.0 +gcsfs>=2023.1.0 \ No newline at end of file diff --git a/tests/core_tests/integration/sample_config.yaml b/tests/core_tests/integration/sample_config.yaml index 0feb485..c8fed56 100644 --- a/tests/core_tests/integration/sample_config.yaml +++ b/tests/core_tests/integration/sample_config.yaml @@ -1,7 +1,7 @@ -class_names: [alien, mars, cat] +class_names: [horse, bear, cat] prompts_number: 1 prompt_generator: simple num_objects_range: [2, 3] image_generator: sdxl-turbo conf_threshold: 0.15 -seed: 43 \ No newline at end of file +seed: 42 \ No newline at end of file diff --git a/tests/core_tests/integration/test_pipeline.py b/tests/core_tests/integration/test_pipeline.py index a6eba19..a49617c 100644 --- a/tests/core_tests/integration/test_pipeline.py +++ b/tests/core_tests/integration/test_pipeline.py @@ -170,3 +170,43 @@ def test_cuda_simple_sdxl_turbo_config_classification_pipeline(): ) # Check the run of the pipeline _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_config_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_config_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) diff --git a/tests/core_tests/unittests/test_annotators.py b/tests/core_tests/unittests/test_annotators.py index 794b898..4e78df2 100644 --- a/tests/core_tests/unittests/test_annotators.py +++ b/tests/core_tests/unittests/test_annotators.py @@ -9,6 +9,7 @@ from datadreamer.dataset_annotation.clip_annotator import CLIPAnnotator from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator +from datadreamer.dataset_annotation.slimsam_annotator import SlimSAMAnnotator # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1024**3) @@ -65,6 +66,8 @@ def _check_clip_annotator(device: str, size: str = "base"): # Check that the labels are ndarray of integers assert isinstance(labels[0], np.ndarray) and labels[0].dtype == np.int64 + annotator.release(empty_cuda_cache=True if device != "cpu" else False) + @pytest.mark.skipif( not torch.cuda.is_available() or total_disk_space < 16, @@ -87,7 +90,7 @@ def test_cpu_clip_base_annotator(): reason="Test requires GPU and 16GB of HDD", ) def test_cuda_clip_large_annotator(): - _check_clip_annotator("cuda") + _check_clip_annotator("cuda", size="large") @pytest.mark.skipif( @@ -95,4 +98,58 @@ def test_cuda_clip_large_annotator(): reason="Test requires at least 16GB of HDD", ) def test_cpu_clip_large_annotator(): - _check_clip_annotator("cpu") + _check_clip_annotator("cpu", size="large") + + +def _check_slimsam_annotator(device: str, size: str = "base"): + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = SlimSAMAnnotator(device=device, size=size) + masks = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])]) + w, h = im.width, im.height + # Check that the masks are lists + assert isinstance(masks, list) and len(masks) == 1 + # Check that the masks are [B, O, N, 2], where + # - B = batch size + # - O = number of objects + # - N = number of points of the mask segment polygon (at least 3 to be polygon) + assert isinstance(masks[0], list) and len(masks[0]) == 1 + assert isinstance(masks[0][0], list) and len(masks[0][0]) >= 3 + for point in masks[0][0]: + # Check that it is a 2D point + assert len(point) == 2 + assert 0 <= point[0] <= w and 0 <= point[1] <= h + + annotator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_slimsam_base_annotator(): + _check_slimsam_annotator("cuda") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_slimsam_base_annotator(): + _check_slimsam_annotator("cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_slimsam_large_annotator(): + _check_slimsam_annotator("cuda", size="large") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_slimsam_large_annotator(): + _check_slimsam_annotator("cpu", size="large") diff --git a/tests/core_tests/unittests/test_image_generation.py b/tests/core_tests/unittests/test_image_generation.py index 2436f75..6ff15ef 100644 --- a/tests/core_tests/unittests/test_image_generation.py +++ b/tests/core_tests/unittests/test_image_generation.py @@ -111,19 +111,3 @@ def test_cuda_sdxl_turbo_image_generator(): ) def test_cpu_sdxl_turbo_image_generator(): _check_image_generator(StableDiffusionTurboImageGenerator, "cpu") - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, - reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", -) -def test_cuda_sdxl_lightning_image_generator(): - _check_image_generator(StableDiffusionLightningImageGenerator, "cuda") - - -@pytest.mark.skipif( - total_memory < 16 or total_disk_space < 25, - reason="Test requires at least 16GB of RAM and 25GB of HDD", -) -def test_cpu_sdxl_lightning_image_generator(): - _check_image_generator(StableDiffusionLightningImageGenerator, "cpu") diff --git a/tests/heavy_tests/integration/test_pipeline_heavy.py b/tests/heavy_tests/integration/test_pipeline_heavy.py index ad9ec8a..6b7dc3f 100644 --- a/tests/heavy_tests/integration/test_pipeline_heavy.py +++ b/tests/heavy_tests/integration/test_pipeline_heavy.py @@ -57,7 +57,7 @@ def test_cpu_simple_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -79,7 +79,7 @@ def test_cuda_simple_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -101,7 +101,7 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -124,7 +124,7 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -147,7 +147,7 @@ def test_cpu_simple_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -169,7 +169,7 @@ def test_cuda_simple_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -194,7 +194,7 @@ def test_cpu_lm_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -216,7 +216,7 @@ def test_cuda_lm_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -238,7 +238,7 @@ def test_cuda_4bit_lm_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -261,7 +261,7 @@ def test_cpu_lm_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -283,7 +283,7 @@ def test_cuda_lm_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -305,7 +305,7 @@ def test_cuda_4bit_lm_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -331,7 +331,7 @@ def test_cpu_tiny_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -353,7 +353,7 @@ def test_cuda_tiny_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -375,7 +375,7 @@ def test_cpu_tiny_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -397,7 +397,7 @@ def test_cuda_tiny_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -422,7 +422,7 @@ def test_cpu_qwen2_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " @@ -444,7 +444,7 @@ def test_cuda_qwen2_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " @@ -466,7 +466,7 @@ def test_cpu_qwen2_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " @@ -488,7 +488,7 @@ def test_cuda_qwen2_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " @@ -501,7 +501,7 @@ def test_cuda_qwen2_sdxl_detection_pipeline(): # ========================================================= -# CLASSIFICATION - SIMPLE LM +# INSTANCE SEGMENTATION - SIMPLE LM # ========================================================= @pytest.mark.skipif( total_memory < 16 or total_disk_space < 35, @@ -514,7 +514,7 @@ def test_cpu_simple_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -538,7 +538,7 @@ def test_cuda_simple_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -562,7 +562,7 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -587,7 +587,7 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -612,7 +612,7 @@ def test_cpu_simple_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--image_annotator clip " @@ -636,7 +636,7 @@ def test_cuda_simple_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--image_annotator clip " @@ -650,7 +650,7 @@ def test_cuda_simple_sdxl_classification_pipeline(): # ========================================================= -# CLASSIFICATION - LLM +# INSTANCE SEGMENTATION - LLM # ========================================================= @pytest.mark.skipif( total_memory < 32 or total_disk_space < 55, @@ -663,7 +663,7 @@ def test_cpu_lm_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -687,7 +687,7 @@ def test_cuda_lm_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -711,7 +711,7 @@ def test_cuda_4bit_lm_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -736,7 +736,7 @@ def test_cpu_lm_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--image_annotator clip " @@ -760,7 +760,7 @@ def test_cuda_lm_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--image_annotator clip " @@ -784,7 +784,7 @@ def test_cuda_4bit_lm_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -799,7 +799,7 @@ def test_cuda_4bit_lm_sdxl_classification_pipeline(): # ========================================================= -# CLASSIFICATION - TinyLlama LLM +# INSTANCE SEGMENTATION - TinyLlama LLM # ========================================================= @pytest.mark.skipif( total_memory < 16 or total_disk_space < 35, @@ -812,7 +812,7 @@ def test_cpu_tiny_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--image_annotator clip " @@ -836,7 +836,7 @@ def test_cuda_tiny_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -860,7 +860,7 @@ def test_cpu_tiny_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -884,7 +884,7 @@ def test_cuda_tiny_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -911,7 +911,7 @@ def test_cpu_qwen2_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--image_annotator clip " @@ -935,7 +935,7 @@ def test_cuda_qwen2_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " @@ -959,7 +959,7 @@ def test_cpu_qwen2_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " @@ -983,7 +983,7 @@ def test_cuda_qwen2_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " @@ -994,3 +994,499 @@ def test_cuda_qwen2_sdxl_classification_pipeline(): ) # Check the run of the pipeline _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - SIMPLE LM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 55, + reason="Test requires GPU, at least 16GB of RAM and 55GB of HDD", +) +def test_cuda_simple_llm_synonym_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-llm-synonym-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--image_annotator owlv2-slimsam " + f"--use_image_tester " + f"--synonym_generator llm " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_wordnet_synonym_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-wordnet-synonym-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--synonym_generator wordnet " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, 55GB of HDD and CUDA support", +) +def test_cuda_lm_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 14 or not torch.cuda.is_available() or total_disk_space < 45, + reason="Test requires at least 14GB of RAM, 45GB of HDD and CUDA support", +) +def test_cuda_4bit_lm_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-4bit-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--lm_quantization 4bit " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", +) +def test_cuda_lm_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 14 or not torch.cuda.is_available() or total_disk_space < 45, + reason="Test requires at least 14GB of RAM, CUDA support and 45GB of HDD", +) +def test_cuda_4bit_lm_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-4bit-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--lm_quantization 4bit " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - TinyLlama LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_tiny_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-tiny-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_tiny_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-tiny-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_tiny_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-tiny-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_tiny_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-tiny-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - Qwen2.5 LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_qwen2_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-qwen2-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_qwen2_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-qwen2-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_qwen2_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-qwen2-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_qwen2_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-qwen2-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) diff --git a/tests/heavy_tests/unittests/test_image_generation_heavy.py b/tests/heavy_tests/unittests/test_image_generation_heavy.py index 30141cc..49721f7 100644 --- a/tests/heavy_tests/unittests/test_image_generation_heavy.py +++ b/tests/heavy_tests/unittests/test_image_generation_heavy.py @@ -66,3 +66,19 @@ def test_cuda_sdxl_image_generator(): ) def test_cpu_sdxl_image_generator(): _check_image_generator(StableDiffusionImageGenerator, "cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", +) +def test_cuda_sdxl_lightning_image_generator(): + _check_image_generator(StableDiffusionLightningImageGenerator, "cuda") + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", +) +def test_cpu_sdxl_lightning_image_generator(): + _check_image_generator(StableDiffusionLightningImageGenerator, "cpu")