From 786187aa15dbb43fa7215f13d6d70dd5e4fc6275 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Tue, 20 Feb 2024 10:09:35 +0400 Subject: [PATCH 01/12] Add batch annotation --- .../dataset_annotation/owlv2_annotator.py | 152 +++++++++++------- datadreamer/dataset_annotation/utils.py | 4 +- .../generate_dataset_from_scratch.py | 121 ++++++++------ 3 files changed, 164 insertions(+), 113 deletions(-) diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index f6d571e..3b9b2cf 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -4,6 +4,7 @@ from datadreamer.dataset_annotation.image_annotator import BaseAnnotator from datadreamer.dataset_annotation.utils import apply_tta from datadreamer.utils.nms import non_max_suppression +import numpy as np class OWLv2Annotator(BaseAnnotator): @@ -18,7 +19,7 @@ class OWLv2Annotator(BaseAnnotator): Methods: _init_model(): Initializes the OWLv2 model. _init_processor(): Initializes the processor for the OWLv2 model. - annotate(image, prompts, conf_threshold, use_tta, synonym_dict): Annotates the given image with bounding boxes and labels. + annotate_batch(image, prompts, conf_threshold, use_tta, synonym_dict): Annotates the given image with bounding boxes and labels. release(empty_cuda_cache): Releases resources and optionally empties the CUDA cache. """ @@ -58,14 +59,49 @@ def _init_processor(self): return Owlv2Processor.from_pretrained( "google/owlv2-base-patch16-ensemble", do_pad=False ) + + def _generate_annotations(self, images, prompts, conf_threshold=0.1): + """""" + n = len(images) + batched_prompts = [prompts] * n + target_sizes = torch.Tensor(images[0].size[::-1]).repeat((n, 1)).to(self.device) + + inputs = self.processor( + text=batched_prompts, images=images, return_tensors="pt" + ).to(self.device) + with torch.no_grad(): + outputs = self.model(**inputs) + # print(outputs) + preds = self.processor.post_process_object_detection( + outputs=outputs, target_sizes=target_sizes, threshold=conf_threshold + ) + + return preds + + def _get_annotations(self, pred, use_tta: bool, img_dim: int, synonym_dict, synonym_dict_rev): + boxes, scores, labels = ( + pred["boxes"], + pred["scores"], + pred["labels"], + ) + # Flip boxes back if using TTA + if use_tta: + boxes[:, [0, 2]] = img_dim - boxes[:, [2, 0]] + + if synonym_dict is not None: + labels = torch.tensor( + [synonym_dict_rev[label.item()] for label in labels] + ) + + return boxes, scores, labels - def annotate( - self, image, prompts, conf_threshold=0.1, use_tta=False, synonym_dict=None + def annotate_batch( + self, images, prompts, conf_threshold=0.1, use_tta=False, synonym_dict=None ): - """Annotates an image using the OWLv2 model. + """Annotates images using the OWLv2 model. Args: - image: The image to be annotated. + images: The images to be annotated. prompts: Prompts to guide the annotation. conf_threshold (float, optional): Confidence threshold for the annotations. Defaults to 0.1. use_tta (bool, optional): Flag to apply test-time augmentation. Defaults to False. @@ -75,9 +111,7 @@ def annotate( tuple: A tuple containing the final bounding boxes, scores, and labels for the annotations. """ if use_tta: - augmented_images = apply_tta(image) - else: - augmented_images = [image] + augmented_images = [apply_tta(image)[0] for image in images] if synonym_dict is not None: prompts_syn = [] @@ -93,65 +127,67 @@ def annotate( synonym_dict_rev[prompts_syn.index(v)] = prompts.index(key) prompts = prompts_syn - all_boxes = [] - all_scores = [] - all_labels = [] - - target_sizes = torch.Tensor([augmented_images[0].size[::-1]]).to(self.device) - - for aug_image in augmented_images: - inputs = self.processor( - text=prompts, images=aug_image, return_tensors="pt" - ).to(self.device) - with torch.no_grad(): - outputs = self.model(**inputs) - # print(outputs) - preds = self.processor.post_process_object_detection( - outputs=outputs, target_sizes=target_sizes, threshold=conf_threshold - ) - - boxes, scores, labels = ( - preds[0]["boxes"], - preds[0]["scores"], - preds[0]["labels"], + preds = self._generate_annotations(self, images, prompts, conf_threshold) + if use_tta: + augmented_preds = self._generate_annotations(self, augmented_images, prompts, conf_threshold) + else: + augmented_preds = [None] * len(images) + + final_boxes = [] + final_scores = [] + final_labels = [] + + for i, (pred, aug_pred) in enumerate(zip(preds, augmented_preds)): + boxes, scores, labels = self._get_annotations( + pred, + False, + images[i].size[0], + synonym_dict, + synonym_dict_rev ) + + all_boxes = [boxes.to("cpu")] + all_scores = [scores.to("cpu")] + all_labels = [labels.to("cpu")] + # Flip boxes back if using TTA - if use_tta and len(all_boxes) == 1: - boxes[:, [0, 2]] = image.size[0] - boxes[:, [2, 0]] - - if synonym_dict is not None: - labels = torch.tensor( - [synonym_dict_rev[label.item()] for label in labels] + if use_tta: + aug_boxes, aug_scores, aug_labels = self._get_annotations( + aug_pred, + True, + images[i].size[0], + synonym_dict, + synonym_dict_rev ) - all_boxes.append(boxes.to("cpu")) - all_scores.append(scores.to("cpu")) - all_labels.append(labels.to("cpu")) + all_boxes.append(aug_boxes.to("cpu")) + all_scores.append(aug_scores.to("cpu")) + all_labels.append(aug_labels.to("cpu")) - # Convert list of tensors to a single tensor for NMS - all_boxes_cat = torch.cat(all_boxes) - all_scores_cat = torch.cat(all_scores) - all_labels_cat = torch.cat(all_labels) + # Convert list of tensors to a single tensor for NMS + all_boxes_cat = torch.cat(all_boxes) + all_scores_cat = torch.cat(all_scores) + all_labels_cat = torch.cat(all_labels) - one_hot_labels = torch.nn.functional.one_hot( - all_labels_cat, num_classes=len(prompts) - ) + one_hot_labels = torch.nn.functional.one_hot( + all_labels_cat, num_classes=len(prompts) + ) - # Apply NMS - # transform predictions to shape [N, 5 + num_classes], N is the number of bboxes for nms function - all_boxes_cat = torch.cat( - (all_boxes_cat, all_scores_cat.unsqueeze(-1), one_hot_labels), - dim=1, - ) + # Apply NMS + # transform predictions to shape [N, 5 + num_classes], N is the number of bboxes for nms function + all_boxes_cat = torch.cat( + (all_boxes_cat, all_scores_cat.unsqueeze(-1), one_hot_labels), + dim=1, + ) - # output is a list of detections, each item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls]. - output = non_max_suppression( - all_boxes_cat.unsqueeze(0), conf_thres=conf_threshold, iou_thres=0.2 - ) + # output is a list of detections, each item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls]. + output = non_max_suppression( + all_boxes_cat.unsqueeze(0), conf_thres=conf_threshold, iou_thres=0.2 + ) - final_boxes = output[0][:, :4] - final_scores = output[0][:, 4] - final_labels = output[0][:, 5].long() + final_boxes.append(output[0][:, :4]) + final_scores.append(output[0][:, 4]) + final_labels.append(output[0][:, 5].long()) return final_boxes, final_scores, final_labels diff --git a/datadreamer/dataset_annotation/utils.py b/datadreamer/dataset_annotation/utils.py index acf9734..2b0ae88 100644 --- a/datadreamer/dataset_annotation/utils.py +++ b/datadreamer/dataset_annotation/utils.py @@ -8,7 +8,7 @@ def apply_tta(image): image: The image to be augmented. Returns: - list: A list of augmented images, including the original and transformed versions. + list: A list of augmented images. Note: Currently, only horizontal flip is enabled. Additional transformations like @@ -16,7 +16,7 @@ def apply_tta(image): """ tta_transforms = [ # Original image - transforms.Compose([]), + # transforms.Compose([]), # Horizontal Flip transforms.Compose([transforms.RandomHorizontalFlip(p=1)]), # Vertical Flip diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 2a1f5e3..1e75222 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -145,6 +145,13 @@ def parse_args(): help="Batch size for prompt generation", ) + parser.add_argument( + "--batch_size_annotation", + type=int, + default=64, + help="Batch size for annotation", + ) + parser.add_argument( "--device", type=str, @@ -221,6 +228,10 @@ def check_args(args): if args.batch_size_prompt < 1: raise ValueError("--batch_size_prompt must be a positive integer") + # Check batch_size_prompt + if args.batch_size_annotation < 1: + raise ValueError("--batch_size_annotation must be a positive integer") + # Check seed if args.seed < 0: raise ValueError("--seed must be a non-negative integer") @@ -351,69 +362,73 @@ def main(): scores_list = [] labels_list = [] - for i, image_path in tqdm( - enumerate(image_paths), - desc="Annotating images", - total=len(image_paths), - ): - image = Image.open(image_path) - boxes, scores, local_labels = annotator.annotate( - image, + # Split image_paths into batches + image_batches = [image_paths[i:i + args.batch_size_annotation] for i in range(0, len(image_paths), args.batch_size_annotation)] + + for i, image_batch in tqdm( + enumerate(image_batches), + desc="Annotating images", + total=len(image_batches), + ): + images = [Image.open(image_path) for image_path in image_batch] + boxes_batch, scores_batch, local_labels_batch = annotator.annotate_batch( + images, args.class_names, conf_threshold=args.conf_threshold, use_tta=args.use_tta, synonym_dict=synonym_dict, ) # Convert to numpy arrays - boxes = ( - boxes.detach().cpu().numpy() - if not isinstance(boxes, np.ndarray) - else boxes + boxes_batch = ( + boxes_batch.detach().cpu().numpy() + if not isinstance(boxes_batch, np.ndarray) + else boxes_batch ) - scores = ( - scores.detach().cpu().numpy() - if not isinstance(scores, np.ndarray) - else scores + scores_batch = ( + scores_batch.detach().cpu().numpy() + if not isinstance(scores_batch, np.ndarray) + else scores_batch ) - local_labels = ( - local_labels - if isinstance(local_labels, np.ndarray) - else local_labels.detach().cpu().numpy() + local_labels_batch = ( + local_labels_batch + if isinstance(local_labels_batch, np.ndarray) + else local_labels_batch.detach().cpu().numpy() ) - boxes_list.append(boxes) - scores_list.append(scores) - - labels = [] - # Save bbox visualizations - fig, ax = plt.subplots(1) - ax.imshow(image) - for box, score, label in zip(boxes, scores, local_labels): - labels.append(label) - x1, y1, x2, y2 = box - rect = patches.Rectangle( - (x1, y1), - x2 - x1, - y2 - y1, - linewidth=2, - edgecolor="r", - facecolor="none", - ) - ax.add_patch(rect) - label_text = args.class_names[label] - plt.text( - x1, - y1, - f"{label_text} {score:.2f}", - bbox=dict(facecolor="yellow", alpha=0.5), - ) - # Add prompt text as title - plt.title(generated_prompts[i][1]) - - labels_list.append(np.array(labels)) - - plt.savefig(os.path.join(bbox_dir, f"bbox_{i}.jpg")) - plt.close() + boxes_list.extend(boxes_batch) + scores_list.extend(scores_batch) + + for j, image in enumerate(images): + labels = [] + # Save bbox visualizations + fig, ax = plt.subplots(1) + ax.imshow(image) + for box, score, label in zip(boxes_batch[j], scores_batch[j], local_labels_batch[j]): + labels.append(label) + x1, y1, x2, y2 = box + rect = patches.Rectangle( + (x1, y1), + x2 - x1, + y2 - y1, + linewidth=2, + edgecolor="r", + facecolor="none", + ) + ax.add_patch(rect) + label_text = args.class_names[label] + plt.text( + x1, + y1, + f"{label_text} {score:.2f}", + bbox=dict(facecolor="yellow", alpha=0.5), + ) + # Add prompt text as title + plt.title(generated_prompts[i * args.batch_size + j][1]) + + labels_list.append(np.array(labels)) + + plt.savefig(os.path.join(bbox_dir, f"bbox_{i * args.batch_size + j}.jpg")) + plt.close() # Save annotations as JSON files save_det_annotations_to_json( From 34c949de5bd1d602873e60ad1fc211250b5fcbf5 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Wed, 21 Feb 2024 14:24:27 +0400 Subject: [PATCH 02/12] Update docs & add test & fix batched annotation --- README.md | 1 + .../dataset_annotation/image_annotator.py | 2 +- .../dataset_annotation/owlv2_annotator.py | 79 +++++++++++-------- .../generate_dataset_from_scratch.py | 43 +++++----- tests/integration/test_pipeline.py | 6 ++ 5 files changed, 74 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 7ff73df..fe7a0b4 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ datadreamer --save_dir --class_names --prompts_number Date: Wed, 21 Feb 2024 17:33:25 +0400 Subject: [PATCH 03/12] Change default batch annotation --- datadreamer/pipelines/generate_dataset_from_scratch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index b2f88de..6ad17d4 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -148,7 +148,7 @@ def parse_args(): parser.add_argument( "--batch_size_annotation", type=int, - default=8, + default=1, help="Batch size for annotation", ) From 479694834d93657964ffde6e595d8f9cbf37c577 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Wed, 21 Feb 2024 21:50:26 +0400 Subject: [PATCH 04/12] Fix annotation tests --- tests/unittests/test_annotators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/test_annotators.py b/tests/unittests/test_annotators.py index 0926d85..70fc713 100644 --- a/tests/unittests/test_annotators.py +++ b/tests/unittests/test_annotators.py @@ -14,7 +14,7 @@ def _check_owlv2_annotator(device: str): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) annotator = OWLv2Annotator(device=device) - final_boxes, final_scores, final_labels = annotator.annotate(im, ["bus", "people"]) + final_boxes, final_scores, final_labels = annotator.annotate_batch(im, ["bus", "people"]) # Assert that the boxes, scores and labels are tensors assert type(final_boxes) == torch.Tensor assert type(final_scores) == torch.Tensor From 9146b4c25474d49b2842e0fc22394f4b3aef3f2c Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Wed, 21 Feb 2024 23:22:18 +0400 Subject: [PATCH 05/12] Fix tests --- tests/unittests/test_annotators.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/unittests/test_annotators.py b/tests/unittests/test_annotators.py index 70fc713..638e35b 100644 --- a/tests/unittests/test_annotators.py +++ b/tests/unittests/test_annotators.py @@ -1,3 +1,4 @@ +import numpy as np import psutil import pytest import requests @@ -14,23 +15,25 @@ def _check_owlv2_annotator(device: str): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) annotator = OWLv2Annotator(device=device) - final_boxes, final_scores, final_labels = annotator.annotate_batch(im, ["bus", "people"]) + final_boxes, final_scores, final_labels = annotator.annotate_batch( + [im], ["bus", "people"] + ) # Assert that the boxes, scores and labels are tensors - assert type(final_boxes) == torch.Tensor - assert type(final_scores) == torch.Tensor - assert type(final_labels) == torch.Tensor + assert isinstance(final_boxes, list) and len(final_boxes) == 1 + assert isinstance(final_scores, list) and len(final_scores) == 1 + assert isinstance(final_labels, list) and len(final_labels) == 1 # Get the number of objects detected - num_objects = final_boxes.shape[0] + num_objects = final_boxes[0].shape[0] # Check that the boxes has correct shape - assert final_boxes.shape == (num_objects, 4) + assert final_boxes[0].shape == (num_objects, 4) # Check that the scores has correct shape - assert final_scores.shape == (num_objects,) + assert final_scores[0].shape == (num_objects,) # Check that the labels has correct shape - assert final_labels.shape == (num_objects,) + assert final_labels[0].shape == (num_objects,) # Check that the scores are not zero - assert torch.all(final_scores > 0) + assert np.all(final_scores[0] > 0) # Check that the labels are bigger or equal to zero - assert torch.all(final_labels >= 0) + assert np.all(final_labels[0] >= 0) @pytest.mark.skipif( From 59b744a12eb2ea21660a560bc0f1a8fca44776cb Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Wed, 21 Feb 2024 19:34:01 +0000 Subject: [PATCH 06/12] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 9d027c7..4f8c185 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 49% - 49% + 50% + 50% From 3cd71d30656fbbef161b7b300b3c385a18bfb35b Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Thu, 22 Feb 2024 23:44:39 +0400 Subject: [PATCH 07/12] Update annotation example & docstrings --- .../dataset_annotation/owlv2_annotator.py | 56 ++++++++++++++++--- examples/image_annotation_example.py | 8 ++- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index b5ef220..7f1747b 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -1,4 +1,7 @@ +from typing import List, Tuple + import numpy as np +import PIL import torch from transformers import Owlv2ForObjectDetection, Owlv2Processor @@ -60,8 +63,23 @@ def _init_processor(self): "google/owlv2-base-patch16-ensemble", do_pad=False ) - def _generate_annotations(self, images, prompts, conf_threshold=0.1): - """""" + def _generate_annotations( + self, + images: List[PIL.Image.Image], + prompts: List[str], + conf_threshold: float = 0.1, + ) -> List[dict[str, torch.Tensor]]: + """ + Generates annotations for the given images and prompts. + + Args: + images: The images to be annotated. + prompts: Prompts to guide the annotation. + conf_threshold (float, optional): Confidence threshold for the annotations. Defaults to 0.1. + + Returns: + dict: A dictionary containing the annotations for the images. + """ n = len(images) batched_prompts = [prompts] * n target_sizes = torch.Tensor(images[0].size[::-1]).repeat((n, 1)).to(self.device) @@ -79,8 +97,27 @@ def _generate_annotations(self, images, prompts, conf_threshold=0.1): return preds def _get_annotations( - self, pred, use_tta: bool, img_dim: int, synonym_dict, synonym_dict_rev - ): + self, + pred: dict[str, torch.Tensor], + use_tta: bool, + img_dim: int, + synonym_dict: dict[str, List[str]] | None, + synonym_dict_rev: dict[int, int] | None, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Extracts the annotations from the predictions. + + Args: + pred: The predictions from the model. + use_tta (bool): Flag to whether the test-time augmentation was applied. + img_dim (int): The dimension of the image. + synonym_dict (dict): Dictionary for handling synonyms in labels. + synonym_dict_rev (dict): Dictionary for handling synonyms in labels. + + Returns: + tuple: A tuple containing the final bounding boxes, scores, and labels for the annotations. + """ + boxes, scores, labels = ( pred["boxes"], pred["scores"], @@ -96,8 +133,13 @@ def _get_annotations( return boxes, scores, labels def annotate_batch( - self, images, prompts, conf_threshold=0.1, use_tta=False, synonym_dict=None - ): + self, + images: List[PIL.Image.Image], + prompts: List[str], + conf_threshold: float = 0.1, + use_tta: bool = False, + synonym_dict: dict[str, List[str]] | None = None, + ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: """Annotates images using the OWLv2 model. Args: @@ -208,7 +250,7 @@ def annotate_batch( return final_boxes, final_scores, final_labels - def release(self, empty_cuda_cache=False) -> None: + def release(self, empty_cuda_cache: bool = False) -> None: """Releases the model and optionally empties the CUDA cache. Args: diff --git a/examples/image_annotation_example.py b/examples/image_annotation_example.py index c1ce649..411ed20 100644 --- a/examples/image_annotation_example.py +++ b/examples/image_annotation_example.py @@ -8,7 +8,7 @@ # Initialize the OWLv2Annotator annotator = OWLv2Annotator( seed=42, - device="cuda", # Use "cuda" for GPU or "cpu" for CPU + device="cpu", # Use "cuda" for GPU or "cpu" for CPU ) # Load your image @@ -22,10 +22,12 @@ prompts = list(class_map.keys()) # Perform object detection -boxes, scores, labels = annotator.annotate( - image, prompts, conf_threshold=0.15, use_tta=True +boxes, scores, labels = annotator.annotate_batch( + [image], prompts, conf_threshold=0.15, use_tta=True ) +boxes, scores, labels = boxes[0], scores[0], labels[0] + # Convert to numpy arrays if not isinstance(boxes, np.ndarray): boxes = boxes.detach().cpu().numpy() From bf372578c81b0a7bddc9c0c3fd4c590373c95c92 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Thu, 22 Feb 2024 23:53:14 +0400 Subject: [PATCH 08/12] Fix formatting --- datadreamer/pipelines/generate_dataset_from_scratch.py | 2 +- tests/integration/test_pipeline.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index ade7c3f..db52cfb 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -151,7 +151,7 @@ def parse_args(): default=1, help="Batch size for annotation", ) - + parser.add_argument( "--batch_size_image", type=int, diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index f946bea..2a5cdc8 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -173,7 +173,7 @@ def test_invalid_batch_size_annotation(): cmd = "datadreamer --batch_size_annotation -1" _check_wrong_value(cmd) - + def test_invalid_batch_size_image(): # Define the cmd cmd = "datadreamer --batch_size_image -1" From 678f2564d7a8846022c7e28d8b45e25be27b7679 Mon Sep 17 00:00:00 2001 From: Jan Cuhel Date: Thu, 22 Feb 2024 23:57:52 +0400 Subject: [PATCH 09/12] Fix docstring --- datadreamer/dataset_annotation/owlv2_annotator.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 7f1747b..ff5d7db 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -69,8 +69,7 @@ def _generate_annotations( prompts: List[str], conf_threshold: float = 0.1, ) -> List[dict[str, torch.Tensor]]: - """ - Generates annotations for the given images and prompts. + """Generates annotations for the given images and prompts. Args: images: The images to be annotated. @@ -104,8 +103,7 @@ def _get_annotations( synonym_dict: dict[str, List[str]] | None, synonym_dict_rev: dict[int, int] | None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Extracts the annotations from the predictions. + """Extracts the annotations from the predictions. Args: pred: The predictions from the model. From 753122723d908dc5c5952f8983c1d3d2d6cbee7a Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 22 Feb 2024 20:08:54 +0000 Subject: [PATCH 10/12] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 4f8c185..1581a9a 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 50% - 50% + 48% + 48% From bf990a76e684cc1970fab465277ae298eb49a308 Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Thu, 22 Feb 2024 22:03:30 +0000 Subject: [PATCH 11/12] refactor: replace annotate() with annotate_batch() --- .../dataset_annotation/image_annotator.py | 2 +- ..._by_step_dataset_generation_pipeline.ipynb | 122 +++++++++--------- 2 files changed, 61 insertions(+), 63 deletions(-) diff --git a/datadreamer/dataset_annotation/image_annotator.py b/datadreamer/dataset_annotation/image_annotator.py index 9de1fa2..bf50dfe 100644 --- a/datadreamer/dataset_annotation/image_annotator.py +++ b/datadreamer/dataset_annotation/image_annotator.py @@ -20,7 +20,7 @@ class BaseAnnotator(ABC): which can be overridden by subclasses for specific tasks. Methods: - annotate(): Abstract method to be implemented by subclasses. It should contain + annotate_batch(): Abstract method to be implemented by subclasses. It should contain the logic for performing annotation based on the task definition. """ diff --git a/examples/step_by_step_dataset_generation_pipeline.ipynb b/examples/step_by_step_dataset_generation_pipeline.ipynb index de29456..b3466a1 100644 --- a/examples/step_by_step_dataset_generation_pipeline.ipynb +++ b/examples/step_by_step_dataset_generation_pipeline.ipynb @@ -2,23 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-02-20 14:37:33.592243: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-02-20 14:37:33.645672: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-02-20 14:37:33.645721: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-02-20 14:37:33.647238: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-02-20 14:37:33.655817: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-02-20 14:37:34.821585: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" - ] - } - ], + "outputs": [], + "source": [ + "!pip install datadreamer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], "source": [ "import matplotlib.patches as patches\n", "import matplotlib.pyplot as plt\n", @@ -39,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -52,7 +49,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "35bb357df74b426d8811d38f375c4a84", + "model_id": "14dc4004b7d14980b1b2bf3346ef64c6", "version_major": 2, "version_minor": 0 }, @@ -84,16 +81,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Generating prompts...: 70%|███████ | 7/10 [00:23<00:09, 3.16s/it]/opt/conda/lib/python3.11/site-packages/transformers/pipelines/base.py:1123: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", + "Generating prompts...: 70%|███████ | 7/10 [00:23<00:09, 3.17s/it]/opt/conda/lib/python3.11/site-packages/transformers/pipelines/base.py:1157: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", " warnings.warn(\n", - "Generating prompts...: 100%|██████████| 10/10 [00:35<00:00, 3.53s/it]" + "Generating prompts...: 100%|██████████| 10/10 [00:35<00:00, 3.54s/it]" ] }, { @@ -119,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -135,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -148,7 +145,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e25d658ea3ae4fe18d045879a0b92a9b", + "model_id": "91eba34a2c3140bf9e5502a1d09c75d8", "version_major": 2, "version_minor": 0 }, @@ -172,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -182,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -195,7 +192,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e6879668be6f4c1aa00fa45076a79cf6", + "model_id": "9f69944f168940e6a9db2cb4ceab9acf", "version_major": 2, "version_minor": 0 }, @@ -210,13 +207,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 10%|█ | 1/10 [00:11<01:42, 11.40s/it]" + "Generating images: 10%|█ | 1/10 [01:45<15:47, 105.22s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2db33d6d57914c808859e1bbec22ba59", + "model_id": "a2160b68d9aa4df2bc36a669f3e03201", "version_major": 2, "version_minor": 0 }, @@ -231,13 +228,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 20%|██ | 2/10 [00:16<01:00, 7.57s/it]" + "Generating images: 20%|██ | 2/10 [01:50<06:10, 46.36s/it] " ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7d6228e28ce246d18069cef6f949932a", + "model_id": "948e56dc4e164492bbd7225f12afb2c9", "version_major": 2, "version_minor": 0 }, @@ -252,13 +249,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 30%|███ | 3/10 [00:20<00:42, 6.00s/it]" + "Generating images: 30%|███ | 3/10 [01:54<03:10, 27.19s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1cb852722f4c4ca98a446f204fee9a22", + "model_id": "b5b5222207274b10b1ba75469fec282b", "version_major": 2, "version_minor": 0 }, @@ -273,13 +270,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 40%|████ | 4/10 [00:24<00:31, 5.28s/it]" + "Generating images: 40%|████ | 4/10 [01:59<01:48, 18.14s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5dc49c660c8145918d395e5ba3ab481c", + "model_id": "6566828bc2a74385991dfd82efe50dd6", "version_major": 2, "version_minor": 0 }, @@ -294,13 +291,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 50%|█████ | 5/10 [00:28<00:24, 4.88s/it]" + "Generating images: 50%|█████ | 5/10 [02:03<01:05, 13.09s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9007293187954f6eae3c88f81acb7d84", + "model_id": "e047acdff8e54df5886790251dfb33fa", "version_major": 2, "version_minor": 0 }, @@ -315,13 +312,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 60%|██████ | 6/10 [00:32<00:18, 4.59s/it]" + "Generating images: 60%|██████ | 6/10 [02:07<00:40, 10.05s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1488a0b44c90479e8efa63981340ad9c", + "model_id": "a1b80c1feaaf4ec4952cc7cdb2464fd5", "version_major": 2, "version_minor": 0 }, @@ -336,13 +333,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 70%|███████ | 7/10 [00:36<00:13, 4.41s/it]" + "Generating images: 70%|███████ | 7/10 [02:11<00:24, 8.13s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2382da1c5de848dd8c4db0200448cdf3", + "model_id": "c09c5adc383e4e82b968caf1a6a9cc37", "version_major": 2, "version_minor": 0 }, @@ -357,13 +354,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 80%|████████ | 8/10 [00:40<00:08, 4.31s/it]" + "Generating images: 80%|████████ | 8/10 [02:15<00:13, 6.85s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1d466fe6bb9d4e3fbe1bafd339bfbfdc", + "model_id": "1febee5bc5db4bee90c4aff3924942a1", "version_major": 2, "version_minor": 0 }, @@ -378,13 +375,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 90%|█████████ | 9/10 [00:44<00:04, 4.21s/it]" + "Generating images: 90%|█████████ | 9/10 [02:19<00:06, 6.00s/it]" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a963d42464434619bb883fa16de2e899", + "model_id": "79404d67bd804f0a927eb9c19098919c", "version_major": 2, "version_minor": 0 }, @@ -399,7 +396,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Generating images: 100%|██████████| 10/10 [00:48<00:00, 4.89s/it]\n" + "Generating images: 100%|██████████| 10/10 [02:24<00:00, 14.40s/it]\n" ] } ], @@ -412,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -420,7 +417,7 @@ "output_type": "stream", "text": [ "(['aeroplane'], 'A photo of an aeroplane soaring over a cityscape at sunset, casting a golden glow over the rooftops and creating a stunning backdrop for the bustling metropolis below.')\n", - "\n" + "\n" ] }, { @@ -438,7 +435,7 @@ "output_type": "stream", "text": [ "(['car', 'aeroplane'], 'A photo of a car and a helicopter in the city skyline, showing the coexistence of man-made machines in urban life.')\n", - "\n" + "\n" ] }, { @@ -456,7 +453,7 @@ "output_type": "stream", "text": [ "(['unicorn'], 'A photo of majestic unicorns frolicking in a lush green meadow, surrounded by vibrant wildflowers and the clear blue sky.')\n", - "\n" + "\n" ] }, { @@ -474,7 +471,7 @@ "output_type": "stream", "text": [ "(['car'], 'A photo of car: A rusty old car sits abandoned in a fields, surrounded by towering wheat stalks. The cars doors are thrown open, revealing a worn-out interior. The scene is bathed in the golden light of the setting sun, casting shadows across the field.')\n", - "\n" + "\n" ] }, { @@ -492,7 +489,7 @@ "output_type": "stream", "text": [ "(['aeroplane'], 'A photo of an aeroplane soaring over a beautiful sunset and a bustling city.')\n", - "\n" + "\n" ] }, { @@ -510,7 +507,7 @@ "output_type": "stream", "text": [ "(['unicorn'], 'A photo of unicorns grazing in a serene meadow, their ethereal beauty and pureness illuminating the scene.')\n", - "\n" + "\n" ] }, { @@ -528,7 +525,7 @@ "output_type": "stream", "text": [ "(['aeroplane', 'unicorn'], 'A photo of an aeroplane and a unicorn soaring above the clouds in the sunset - A serene image of two majestic creatures, gracefully flying together in harmony.')\n", - "\n" + "\n" ] }, { @@ -546,7 +543,7 @@ "output_type": "stream", "text": [ "(['car'], 'A photo of a car. A sleek, red sports car speeds down a winding mountain road, surrounded by the breathtaking views of a scenic landscape.')\n", - "\n" + "\n" ] }, { @@ -564,7 +561,7 @@ "output_type": "stream", "text": [ "(['aeroplane', 'car'], 'A photo of aeroplane flying above a busy city, soaring high above the buildings and the cars below.')\n", - "\n" + "\n" ] }, { @@ -582,7 +579,7 @@ "output_type": "stream", "text": [ "(['person', 'aeroplane'], 'A photo of a person waving goodbye to an aeroplane, as they bid farewell to a loved one who has left for a foreign land. The image captures the bittersweet moment of separation and the longing for a loved one to return, set against the backdrop of an aeroplane.')\n", - "\n" + "\n" ] }, { @@ -608,7 +605,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -617,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -638,7 +635,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -837,7 +834,8 @@ " prompts = prompt_objs\n", "\n", " # Perform object detection\n", - " boxes, scores, labels = annotator.annotate(image, prompts, conf_threshold=0.2, use_tta=True)\n", + " boxes_batch, scores_batch, labels_batch = annotator.annotate_batch([image], prompts, conf_threshold=0.2, use_tta=True)\n", + " boxes, scores, labels = boxes_batch[0], scores_batch[0], labels_batch[0]\n", "\n", " # Convert to numpy arrays\n", " if not isinstance(boxes, np.ndarray):\n", @@ -905,7 +903,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.7" } }, "nbformat": 4, From a9eb344527683b12604a050235c0d09e20d63dce Mon Sep 17 00:00:00 2001 From: Nikita Sokovnin Date: Fri, 23 Feb 2024 14:01:50 +0000 Subject: [PATCH 12/12] feature: replace owlv2 resize --- datadreamer/dataset_annotation/owlv2_annotator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index ff5d7db..c359612 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -60,7 +60,7 @@ def _init_processor(self): Owlv2Processor: The initialized processor. """ return Owlv2Processor.from_pretrained( - "google/owlv2-base-patch16-ensemble", do_pad=False + "google/owlv2-base-patch16-ensemble", do_pad=False, do_resize=False ) def _generate_annotations( @@ -83,6 +83,8 @@ def _generate_annotations( batched_prompts = [prompts] * n target_sizes = torch.Tensor(images[0].size[::-1]).repeat((n, 1)).to(self.device) + # resize the images to the model's input size + images = [images[i].resize((960, 960)) for i in range(n)] inputs = self.processor( text=batched_prompts, images=images, return_tensors="pt" ).to(self.device)