From e8a9e116a6f894671caa394f528c55f91cddf954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Honza=20=C4=8Cuhel?= <79118988+HonzaCuhel@users.noreply.github.com> Date: Fri, 23 Feb 2024 15:15:37 +0100 Subject: [PATCH] Batch annotation (#35) * Add batch annotation * Update docs & add test & fix batched annotation * Change default batch annotation * Fix annotation tests * Fix tests * [Automated] Updated coverage badge * Update annotation example & docstrings * Fix formatting * Fix docstring * [Automated] Updated coverage badge * refactor: replace annotate() with annotate_batch() * feature: replace owlv2 resize --------- Co-authored-by: Jan Cuhel Co-authored-by: GitHub Actions Co-authored-by: Nikita Sokovnin --- README.md | 1 + .../dataset_annotation/image_annotator.py | 4 +- .../dataset_annotation/owlv2_annotator.py | 211 +++++++++++++----- datadreamer/dataset_annotation/utils.py | 4 +- .../generate_dataset_from_scratch.py | 116 +++++----- examples/image_annotation_example.py | 8 +- ..._by_step_dataset_generation_pipeline.ipynb | 122 +++++----- media/coverage_badge.svg | 4 +- tests/integration/test_pipeline.py | 6 + tests/unittests/test_annotators.py | 23 +- 10 files changed, 306 insertions(+), 193 deletions(-) diff --git a/README.md b/README.md index db3450b..2e47f38 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ datadreamer --save_dir --class_names --prompts_number List[dict[str, torch.Tensor]]: + """Generates annotations for the given images and prompts. Args: - image: The image to be annotated. + images: The images to be annotated. + prompts: Prompts to guide the annotation. + conf_threshold (float, optional): Confidence threshold for the annotations. Defaults to 0.1. + + Returns: + dict: A dictionary containing the annotations for the images. + """ + n = len(images) + batched_prompts = [prompts] * n + target_sizes = torch.Tensor(images[0].size[::-1]).repeat((n, 1)).to(self.device) + + # resize the images to the model's input size + images = [images[i].resize((960, 960)) for i in range(n)] + inputs = self.processor( + text=batched_prompts, images=images, return_tensors="pt" + ).to(self.device) + with torch.no_grad(): + outputs = self.model(**inputs) + # print(outputs) + preds = self.processor.post_process_object_detection( + outputs=outputs, target_sizes=target_sizes, threshold=conf_threshold + ) + + return preds + + def _get_annotations( + self, + pred: dict[str, torch.Tensor], + use_tta: bool, + img_dim: int, + synonym_dict: dict[str, List[str]] | None, + synonym_dict_rev: dict[int, int] | None, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Extracts the annotations from the predictions. + + Args: + pred: The predictions from the model. + use_tta (bool): Flag to whether the test-time augmentation was applied. + img_dim (int): The dimension of the image. + synonym_dict (dict): Dictionary for handling synonyms in labels. + synonym_dict_rev (dict): Dictionary for handling synonyms in labels. + + Returns: + tuple: A tuple containing the final bounding boxes, scores, and labels for the annotations. + """ + + boxes, scores, labels = ( + pred["boxes"], + pred["scores"], + pred["labels"], + ) + # Flip boxes back if using TTA + if use_tta: + boxes[:, [0, 2]] = img_dim - boxes[:, [2, 0]] + + if synonym_dict is not None: + labels = torch.tensor([synonym_dict_rev[label.item()] for label in labels]) + + return boxes, scores, labels + + def annotate_batch( + self, + images: List[PIL.Image.Image], + prompts: List[str], + conf_threshold: float = 0.1, + use_tta: bool = False, + synonym_dict: dict[str, List[str]] | None = None, + ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: + """Annotates images using the OWLv2 model. + + Args: + images: The images to be annotated. prompts: Prompts to guide the annotation. conf_threshold (float, optional): Confidence threshold for the annotations. Defaults to 0.1. use_tta (bool, optional): Flag to apply test-time augmentation. Defaults to False. @@ -75,9 +153,7 @@ def annotate( tuple: A tuple containing the final bounding boxes, scores, and labels for the annotations. """ if use_tta: - augmented_images = apply_tta(image) - else: - augmented_images = [image] + augmented_images = [apply_tta(image)[0] for image in images] if synonym_dict is not None: prompts_syn = [] @@ -93,69 +169,88 @@ def annotate( synonym_dict_rev[prompts_syn.index(v)] = prompts.index(key) prompts = prompts_syn - all_boxes = [] - all_scores = [] - all_labels = [] + preds = self._generate_annotations(images, prompts, conf_threshold) + if use_tta: + augmented_preds = self._generate_annotations( + augmented_images, prompts, conf_threshold + ) + else: + augmented_preds = [None] * len(images) - target_sizes = torch.Tensor([augmented_images[0].size[::-1]]).to(self.device) + final_boxes = [] + final_scores = [] + final_labels = [] - for aug_image in augmented_images: - inputs = self.processor( - text=prompts, images=aug_image, return_tensors="pt" - ).to(self.device) - with torch.no_grad(): - outputs = self.model(**inputs) - # print(outputs) - preds = self.processor.post_process_object_detection( - outputs=outputs, target_sizes=target_sizes, threshold=conf_threshold + for i, (pred, aug_pred) in enumerate(zip(preds, augmented_preds)): + boxes, scores, labels = self._get_annotations( + pred, + False, + images[i].size[0], + synonym_dict, + synonym_dict_rev if synonym_dict is not None else None, ) - boxes, scores, labels = ( - preds[0]["boxes"], - preds[0]["scores"], - preds[0]["labels"], - ) - # Flip boxes back if using TTA - if use_tta and len(all_boxes) == 1: - boxes[:, [0, 2]] = image.size[0] - boxes[:, [2, 0]] + all_boxes = [boxes.to("cpu")] + all_scores = [scores.to("cpu")] + all_labels = [labels.to("cpu")] - if synonym_dict is not None: - labels = torch.tensor( - [synonym_dict_rev[label.item()] for label in labels] + # Flip boxes back if using TTA + if use_tta: + aug_boxes, aug_scores, aug_labels = self._get_annotations( + aug_pred, + True, + images[i].size[0], + synonym_dict, + synonym_dict_rev if synonym_dict is not None else None, ) - all_boxes.append(boxes.to("cpu")) - all_scores.append(scores.to("cpu")) - all_labels.append(labels.to("cpu")) + all_boxes.append(aug_boxes.to("cpu")) + all_scores.append(aug_scores.to("cpu")) + all_labels.append(aug_labels.to("cpu")) - # Convert list of tensors to a single tensor for NMS - all_boxes_cat = torch.cat(all_boxes) - all_scores_cat = torch.cat(all_scores) - all_labels_cat = torch.cat(all_labels) + one_hot_labels = torch.nn.functional.one_hot( + torch.cat(all_labels), num_classes=len(prompts) + ) - one_hot_labels = torch.nn.functional.one_hot( - all_labels_cat, num_classes=len(prompts) - ) + # Apply NMS + # transform predictions to shape [N, 5 + num_classes], N is the number of bboxes for nms function + all_boxes_cat = torch.cat( + ( + torch.cat(all_boxes), + torch.cat(all_scores).unsqueeze(-1), + one_hot_labels, + ), + dim=1, + ) - # Apply NMS - # transform predictions to shape [N, 5 + num_classes], N is the number of bboxes for nms function - all_boxes_cat = torch.cat( - (all_boxes_cat, all_scores_cat.unsqueeze(-1), one_hot_labels), - dim=1, - ) + # output is a list of detections, each item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls]. + output = non_max_suppression( + all_boxes_cat.unsqueeze(0), conf_thres=conf_threshold, iou_thres=0.2 + ) - # output is a list of detections, each item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls]. - output = non_max_suppression( - all_boxes_cat.unsqueeze(0), conf_thres=conf_threshold, iou_thres=0.2 - ) + output_boxes = output[0][:, :4] + output_scores = output[0][:, 4] + output_local_labels = output[0][:, 5].long() - final_boxes = output[0][:, :4] - final_scores = output[0][:, 4] - final_labels = output[0][:, 5].long() + final_boxes.append( + output_boxes.detach().cpu().numpy() + if not isinstance(output_boxes, np.ndarray) + else output_boxes + ) + final_scores.append( + output_scores.detach().cpu().numpy() + if not isinstance(output_scores, np.ndarray) + else output_scores + ) + final_labels.append( + output_local_labels.detach().cpu().numpy() + if not isinstance(output_local_labels, np.ndarray) + else output_local_labels + ) return final_boxes, final_scores, final_labels - def release(self, empty_cuda_cache=False) -> None: + def release(self, empty_cuda_cache: bool = False) -> None: """Releases the model and optionally empties the CUDA cache. Args: diff --git a/datadreamer/dataset_annotation/utils.py b/datadreamer/dataset_annotation/utils.py index acf9734..2b0ae88 100644 --- a/datadreamer/dataset_annotation/utils.py +++ b/datadreamer/dataset_annotation/utils.py @@ -8,7 +8,7 @@ def apply_tta(image): image: The image to be augmented. Returns: - list: A list of augmented images, including the original and transformed versions. + list: A list of augmented images. Note: Currently, only horizontal flip is enabled. Additional transformations like @@ -16,7 +16,7 @@ def apply_tta(image): """ tta_transforms = [ # Original image - transforms.Compose([]), + # transforms.Compose([]), # Horizontal Flip transforms.Compose([transforms.RandomHorizontalFlip(p=1)]), # Vertical Flip diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index d106f60..db52cfb 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -145,6 +145,13 @@ def parse_args(): help="Batch size for prompt generation", ) + parser.add_argument( + "--batch_size_annotation", + type=int, + default=1, + help="Batch size for annotation", + ) + parser.add_argument( "--batch_size_image", type=int, @@ -228,6 +235,10 @@ def check_args(args): if args.batch_size_prompt < 1: raise ValueError("--batch_size_prompt must be a positive integer") + # Check batch_size_prompt + if args.batch_size_annotation < 1: + raise ValueError("--batch_size_annotation must be a positive integer") + # Check batch_size_image if args.batch_size_image < 1: raise ValueError("--batch_size_image must be a positive integer") @@ -366,69 +377,66 @@ def main(): scores_list = [] labels_list = [] - for i, image_path in tqdm( - enumerate(image_paths), + # Split image_paths into batches + image_batches = [ + image_paths[i : i + args.batch_size_annotation] + for i in range(0, len(image_paths), args.batch_size_annotation) + ] + + for i, image_batch in tqdm( + enumerate(image_batches), desc="Annotating images", - total=len(image_paths), + total=len(image_batches), ): - image = Image.open(image_path) - boxes, scores, local_labels = annotator.annotate( - image, + images = [Image.open(image_path) for image_path in image_batch] + boxes_batch, scores_batch, local_labels_batch = annotator.annotate_batch( + images, args.class_names, conf_threshold=args.conf_threshold, use_tta=args.use_tta, synonym_dict=synonym_dict, ) - # Convert to numpy arrays - boxes = ( - boxes.detach().cpu().numpy() - if not isinstance(boxes, np.ndarray) - else boxes - ) - scores = ( - scores.detach().cpu().numpy() - if not isinstance(scores, np.ndarray) - else scores - ) - local_labels = ( - local_labels - if isinstance(local_labels, np.ndarray) - else local_labels.detach().cpu().numpy() - ) - - boxes_list.append(boxes) - scores_list.append(scores) - labels = [] - # Save bbox visualizations - fig, ax = plt.subplots(1) - ax.imshow(image) - for box, score, label in zip(boxes, scores, local_labels): - labels.append(label) - x1, y1, x2, y2 = box - rect = patches.Rectangle( - (x1, y1), - x2 - x1, - y2 - y1, - linewidth=2, - edgecolor="r", - facecolor="none", - ) - ax.add_patch(rect) - label_text = args.class_names[label] - plt.text( - x1, - y1, - f"{label_text} {score:.2f}", - bbox=dict(facecolor="yellow", alpha=0.5), + boxes_list.extend(boxes_batch) + scores_list.extend(scores_batch) + + for j, image in enumerate(images): + labels = [] + # Save bbox visualizations + fig, ax = plt.subplots(1) + ax.imshow(image) + for box, score, label in zip( + boxes_batch[j], scores_batch[j], local_labels_batch[j] + ): + labels.append(label) + x1, y1, x2, y2 = box + rect = patches.Rectangle( + (x1, y1), + x2 - x1, + y2 - y1, + linewidth=2, + edgecolor="r", + facecolor="none", + ) + ax.add_patch(rect) + label_text = args.class_names[label] + plt.text( + x1, + y1, + f"{label_text} {score:.2f}", + bbox=dict(facecolor="yellow", alpha=0.5), + ) + # Add prompt text as title + plt.title(generated_prompts[i * args.batch_size_annotation + j][1]) + + labels_list.append(np.array(labels)) + + plt.savefig( + os.path.join( + bbox_dir, f"bbox_{i * args.batch_size_annotation + j}.jpg" + ) ) - # Add prompt text as title - plt.title(generated_prompts[i][1]) - - labels_list.append(np.array(labels)) - - plt.savefig(os.path.join(bbox_dir, f"bbox_{i}.jpg")) - plt.close() + plt.close() # Save annotations as JSON files save_det_annotations_to_json( diff --git a/examples/image_annotation_example.py b/examples/image_annotation_example.py index c1ce649..411ed20 100644 --- a/examples/image_annotation_example.py +++ b/examples/image_annotation_example.py @@ -8,7 +8,7 @@ # Initialize the OWLv2Annotator annotator = OWLv2Annotator( seed=42, - device="cuda", # Use "cuda" for GPU or "cpu" for CPU + device="cpu", # Use "cuda" for GPU or "cpu" for CPU ) # Load your image @@ -22,10 +22,12 @@ prompts = list(class_map.keys()) # Perform object detection -boxes, scores, labels = annotator.annotate( - image, prompts, conf_threshold=0.15, use_tta=True +boxes, scores, labels = annotator.annotate_batch( + [image], prompts, conf_threshold=0.15, use_tta=True ) +boxes, scores, labels = boxes[0], scores[0], labels[0] + # Convert to numpy arrays if not isinstance(boxes, np.ndarray): boxes = boxes.detach().cpu().numpy() diff --git a/examples/step_by_step_dataset_generation_pipeline.ipynb b/examples/step_by_step_dataset_generation_pipeline.ipynb index de29456..b3466a1 100644 --- a/examples/step_by_step_dataset_generation_pipeline.ipynb +++ b/examples/step_by_step_dataset_generation_pipeline.ipynb @@ -2,23 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-02-20 14:37:33.592243: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-02-20 14:37:33.645672: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-02-20 14:37:33.645721: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-02-20 14:37:33.647238: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-02-20 14:37:33.655817: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-02-20 14:37:34.821585: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" - ] - } - ], + "outputs": [], + "source": [ + "!pip install datadreamer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], "source": [ "import matplotlib.patches as patches\n", "import matplotlib.pyplot as plt\n", @@ -39,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -52,7 +49,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "35bb357df74b426d8811d38f375c4a84", + "model_id": "14dc4004b7d14980b1b2bf3346ef64c6", "version_major": 2, "version_minor": 0 }, @@ -84,16 +81,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generating prompts...: 70%|███████ | 7/10 [00:23<00:09, 3.16s/it]/opt/conda/lib/python3.11/site-packages/transformers/pipelines/base.py:1123: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + "Generating prompts...: 70%|███████ | 7/10 [00:23<00:09, 3.17s/it]/opt/conda/lib/python3.11/site-packages/transformers/pipelines/base.py:1157: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", " warnings.warn(\n", - "Generating prompts...: 100%|██████████| 10/10 [00:35<00:00, 3.53s/it]" + "Generating prompts...: 100%|██████████| 10/10 [00:35<00:00, 3.54s/it]" ] }, { @@ -119,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -135,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -148,7 +145,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e25d658ea3ae4fe18d045879a0b92a9b", + "model_id": "91eba34a2c3140bf9e5502a1d09c75d8", "version_major": 2, "version_minor": 0 }, @@ -172,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -182,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -195,7 +192,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e6879668be6f4c1aa00fa45076a79cf6", + "model_id": "9f69944f168940e6a9db2cb4ceab9acf", "version_major": 2, "version_minor": 0 }, @@ -210,13 +207,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 10%|█ | 1/10 [00:11<01:42, 11.40s/it]" + "Generating images: 10%|█ | 1/10 [01:45<15:47, 105.22s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2db33d6d57914c808859e1bbec22ba59", + "model_id": "a2160b68d9aa4df2bc36a669f3e03201", "version_major": 2, "version_minor": 0 }, @@ -231,13 +228,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 20%|██ | 2/10 [00:16<01:00, 7.57s/it]" + "Generating images: 20%|██ | 2/10 [01:50<06:10, 46.36s/it] " ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7d6228e28ce246d18069cef6f949932a", + "model_id": "948e56dc4e164492bbd7225f12afb2c9", "version_major": 2, "version_minor": 0 }, @@ -252,13 +249,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 30%|███ | 3/10 [00:20<00:42, 6.00s/it]" + "Generating images: 30%|███ | 3/10 [01:54<03:10, 27.19s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1cb852722f4c4ca98a446f204fee9a22", + "model_id": "b5b5222207274b10b1ba75469fec282b", "version_major": 2, "version_minor": 0 }, @@ -273,13 +270,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 40%|████ | 4/10 [00:24<00:31, 5.28s/it]" + "Generating images: 40%|████ | 4/10 [01:59<01:48, 18.14s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5dc49c660c8145918d395e5ba3ab481c", + "model_id": "6566828bc2a74385991dfd82efe50dd6", "version_major": 2, "version_minor": 0 }, @@ -294,13 +291,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 50%|█████ | 5/10 [00:28<00:24, 4.88s/it]" + "Generating images: 50%|█████ | 5/10 [02:03<01:05, 13.09s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9007293187954f6eae3c88f81acb7d84", + "model_id": "e047acdff8e54df5886790251dfb33fa", "version_major": 2, "version_minor": 0 }, @@ -315,13 +312,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 60%|██████ | 6/10 [00:32<00:18, 4.59s/it]" + "Generating images: 60%|██████ | 6/10 [02:07<00:40, 10.05s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1488a0b44c90479e8efa63981340ad9c", + "model_id": "a1b80c1feaaf4ec4952cc7cdb2464fd5", "version_major": 2, "version_minor": 0 }, @@ -336,13 +333,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 70%|███████ | 7/10 [00:36<00:13, 4.41s/it]" + "Generating images: 70%|███████ | 7/10 [02:11<00:24, 8.13s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2382da1c5de848dd8c4db0200448cdf3", + "model_id": "c09c5adc383e4e82b968caf1a6a9cc37", "version_major": 2, "version_minor": 0 }, @@ -357,13 +354,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 80%|████████ | 8/10 [00:40<00:08, 4.31s/it]" + "Generating images: 80%|████████ | 8/10 [02:15<00:13, 6.85s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1d466fe6bb9d4e3fbe1bafd339bfbfdc", + "model_id": "1febee5bc5db4bee90c4aff3924942a1", "version_major": 2, "version_minor": 0 }, @@ -378,13 +375,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 90%|█████████ | 9/10 [00:44<00:04, 4.21s/it]" + "Generating images: 90%|█████████ | 9/10 [02:19<00:06, 6.00s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a963d42464434619bb883fa16de2e899", + "model_id": "79404d67bd804f0a927eb9c19098919c", "version_major": 2, "version_minor": 0 }, @@ -399,7 +396,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 100%|██████████| 10/10 [00:48<00:00, 4.89s/it]\n" + "Generating images: 100%|██████████| 10/10 [02:24<00:00, 14.40s/it]\n" ] } ], @@ -412,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -420,7 +417,7 @@ "output_type": "stream", "text": [ "(['aeroplane'], 'A photo of an aeroplane soaring over a cityscape at sunset, casting a golden glow over the rooftops and creating a stunning backdrop for the bustling metropolis below.')\n", - "\n" + "\n" ] }, { @@ -438,7 +435,7 @@ "output_type": "stream", "text": [ "(['car', 'aeroplane'], 'A photo of a car and a helicopter in the city skyline, showing the coexistence of man-made machines in urban life.')\n", - "\n" + "\n" ] }, { @@ -456,7 +453,7 @@ "output_type": "stream", "text": [ "(['unicorn'], 'A photo of majestic unicorns frolicking in a lush green meadow, surrounded by vibrant wildflowers and the clear blue sky.')\n", - "\n" + "\n" ] }, { @@ -474,7 +471,7 @@ "output_type": "stream", "text": [ "(['car'], 'A photo of car: A rusty old car sits abandoned in a fields, surrounded by towering wheat stalks. The cars doors are thrown open, revealing a worn-out interior. The scene is bathed in the golden light of the setting sun, casting shadows across the field.')\n", - "\n" + "\n" ] }, { @@ -492,7 +489,7 @@ "output_type": "stream", "text": [ "(['aeroplane'], 'A photo of an aeroplane soaring over a beautiful sunset and a bustling city.')\n", - "\n" + "\n" ] }, { @@ -510,7 +507,7 @@ "output_type": "stream", "text": [ "(['unicorn'], 'A photo of unicorns grazing in a serene meadow, their ethereal beauty and pureness illuminating the scene.')\n", - "\n" + "\n" ] }, { @@ -528,7 +525,7 @@ "output_type": "stream", "text": [ "(['aeroplane', 'unicorn'], 'A photo of an aeroplane and a unicorn soaring above the clouds in the sunset - A serene image of two majestic creatures, gracefully flying together in harmony.')\n", - "\n" + "\n" ] }, { @@ -546,7 +543,7 @@ "output_type": "stream", "text": [ "(['car'], 'A photo of a car. A sleek, red sports car speeds down a winding mountain road, surrounded by the breathtaking views of a scenic landscape.')\n", - "\n" + "\n" ] }, { @@ -564,7 +561,7 @@ "output_type": "stream", "text": [ "(['aeroplane', 'car'], 'A photo of aeroplane flying above a busy city, soaring high above the buildings and the cars below.')\n", - "\n" + "\n" ] }, { @@ -582,7 +579,7 @@ "output_type": "stream", "text": [ "(['person', 'aeroplane'], 'A photo of a person waving goodbye to an aeroplane, as they bid farewell to a loved one who has left for a foreign land. The image captures the bittersweet moment of separation and the longing for a loved one to return, set against the backdrop of an aeroplane.')\n", - "\n" + "\n" ] }, { @@ -608,7 +605,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -617,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -638,7 +635,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -837,7 +834,8 @@ " prompts = prompt_objs\n", "\n", " # Perform object detection\n", - " boxes, scores, labels = annotator.annotate(image, prompts, conf_threshold=0.2, use_tta=True)\n", + " boxes_batch, scores_batch, labels_batch = annotator.annotate_batch([image], prompts, conf_threshold=0.2, use_tta=True)\n", + " boxes, scores, labels = boxes_batch[0], scores_batch[0], labels_batch[0]\n", "\n", " # Convert to numpy arrays\n", " if not isinstance(boxes, np.ndarray):\n", @@ -905,7 +903,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index d7667be..1581a9a 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 47% - 47% + 48% + 48% diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index 06a4e6b..2a5cdc8 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -168,6 +168,12 @@ def test_invalid_batch_size_prompt(): _check_wrong_value(cmd) +def test_invalid_batch_size_annotation(): + # Define the cmd + cmd = "datadreamer --batch_size_annotation -1" + _check_wrong_value(cmd) + + def test_invalid_batch_size_image(): # Define the cmd cmd = "datadreamer --batch_size_image -1" diff --git a/tests/unittests/test_annotators.py b/tests/unittests/test_annotators.py index 0926d85..638e35b 100644 --- a/tests/unittests/test_annotators.py +++ b/tests/unittests/test_annotators.py @@ -1,3 +1,4 @@ +import numpy as np import psutil import pytest import requests @@ -14,23 +15,25 @@ def _check_owlv2_annotator(device: str): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) annotator = OWLv2Annotator(device=device) - final_boxes, final_scores, final_labels = annotator.annotate(im, ["bus", "people"]) + final_boxes, final_scores, final_labels = annotator.annotate_batch( + [im], ["bus", "people"] + ) # Assert that the boxes, scores and labels are tensors - assert type(final_boxes) == torch.Tensor - assert type(final_scores) == torch.Tensor - assert type(final_labels) == torch.Tensor + assert isinstance(final_boxes, list) and len(final_boxes) == 1 + assert isinstance(final_scores, list) and len(final_scores) == 1 + assert isinstance(final_labels, list) and len(final_labels) == 1 # Get the number of objects detected - num_objects = final_boxes.shape[0] + num_objects = final_boxes[0].shape[0] # Check that the boxes has correct shape - assert final_boxes.shape == (num_objects, 4) + assert final_boxes[0].shape == (num_objects, 4) # Check that the scores has correct shape - assert final_scores.shape == (num_objects,) + assert final_scores[0].shape == (num_objects,) # Check that the labels has correct shape - assert final_labels.shape == (num_objects,) + assert final_labels[0].shape == (num_objects,) # Check that the scores are not zero - assert torch.all(final_scores > 0) + assert np.all(final_scores[0] > 0) # Check that the labels are bigger or equal to zero - assert torch.all(final_labels >= 0) + assert np.all(final_labels[0] >= 0) @pytest.mark.skipif(