Skip to content

Commit

Permalink
Switch to SlimSAM
Browse files Browse the repository at this point in the history
  • Loading branch information
HonzaCuhel committed Oct 24, 2024
1 parent 7879220 commit 4fae718
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 149 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,13 @@ datadreamer --config <path-to-config>

### 🔧 Additional Parameters

- `--task`: Choose between detection, classification and instance segmentation. Default is `detection`.
- `--task`: Choose between detection, classification, instance segmentation and semantic segmentation. Default is `detection`.
- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.
- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.
- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.
- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`.
- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.
- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-fastsam` for instance segmentation. Default is `owlv2`.
- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`.
- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.
- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.
- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `""`.
Expand Down Expand Up @@ -199,7 +199,7 @@ datadreamer --config <path-to-config>
| | [SDXL-Lightning](https://huggingface.co/ByteDance/SDXL-Lightning) | Fast and accurate (1024x1024 images) |
| Image Annotation | [OWLv2](https://huggingface.co/google/owlv2-base-patch16-ensemble) | Open-Vocabulary object detector |
| | [CLIP](https://huggingface.co/openai/clip-vit-base-patch32) | Zero-shot-image-classification |
| | [FastSAM](https://docs.ultralytics.com/models/fast-sam) | Zero-shot-instance-segmentation |
| | [SlimSAM](https://huggingface.co/Zigeng/SlimSAM-uniform-50) | Zero-shot-instance-segmentation |

<a name="example"></a>

Expand Down
4 changes: 2 additions & 2 deletions datadreamer/dataset_annotation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import annotations

from .clip_annotator import CLIPAnnotator
from .fastsam_annotator import FastSAMAnnotator
from .image_annotator import BaseAnnotator, TaskList
from .owlv2_annotator import OWLv2Annotator
from .slimsam_annotator import SlimSAMAnnotator

__all__ = [
"BaseAnnotator",
"TaskList",
"OWLv2Annotator",
"CLIPAnnotator",
"FastSAMAnnotator",
"SlimSAMAnnotator",
]
99 changes: 0 additions & 99 deletions datadreamer/dataset_annotation/fastsam_annotator.py

This file was deleted.

25 changes: 25 additions & 0 deletions datadreamer/dataset_annotation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import List

import cv2
import numpy as np
from torchvision import transforms


Expand Down Expand Up @@ -32,3 +34,26 @@ def apply_tta(image) -> List[transforms.Compose]:

augmented_images = [t(image) for t in tta_transforms]
return augmented_images


def mask_to_polygon(mask: np.ndarray) -> List[List[int]]:
"""Converts a binary mask to a polygon.
Args:
mask: The binary mask to be converted.
Returns:
List: A list of vertices of the polygon.
"""
# Find contours in the binary mask
contours, _ = cv2.findContours(
mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
)

# Find the contour with the largest area
largest_contour = max(contours, key=cv2.contourArea)

# Extract the vertices of the contour
polygon = largest_contour.reshape(-1, 2).tolist()

return polygon
9 changes: 4 additions & 5 deletions datadreamer/pipelines/generate_dataset_from_scratch.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

from datadreamer.dataset_annotation import (
CLIPAnnotator,
FastSAMAnnotator,
OWLv2Annotator,
SlimSAMAnnotator,
)
from datadreamer.image_generation import (
StableDiffusionImageGenerator,
Expand Down Expand Up @@ -58,8 +58,8 @@

det_annotators = {"owlv2": OWLv2Annotator}
clf_annotators = {"clip": CLIPAnnotator}
inst_seg_annotators = {"owlv2-fastsam": FastSAMAnnotator}
inst_seg_to_det = {"owlv2-fastsam": OWLv2Annotator}
inst_seg_annotators = {"owlv2-slimsam": SlimSAMAnnotator}
inst_seg_to_det = {"owlv2-slimsam": OWLv2Annotator}

setup_logging(use_rich=True)

Expand Down Expand Up @@ -122,7 +122,7 @@ def parse_args():
parser.add_argument(
"--image_annotator",
type=str,
choices=["owlv2", "clip", "owlv2-fastsam"],
choices=["owlv2", "clip", "owlv2-slimsam"],
help="Image annotator to use",
)

Expand Down Expand Up @@ -637,7 +637,6 @@ def read_image_batch(image_batch, batch_num, batch_size):
masks_batch = inst_seg_annotator.annotate_batch(
images=images,
boxes_batch=boxes_batch,
conf_threshold=args.conf_threshold,
iou_threshold=args.annotation_iou_threshold,
)
segment_list.extend(masks_batch)
Expand Down
2 changes: 1 addition & 1 deletion datadreamer/utils/coco_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def process_data(
):
bbox = [box[0], box[1], box[2] - box[0], box[3] - box[1]]
segmentation = (
np.array(mask).reshape(-1).tolist()
np.array(mask).reshape(1, -1).tolist()
if mask is not None
else None
)
Expand Down
2 changes: 1 addition & 1 deletion datadreamer/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Config(LuxonisConfig):
# Profanity filter arguments
disable_lm_filter: bool = False
# Annotation arguments
image_annotator: Literal["owlv2", "clip", "owlv2-fastsam"] = "owlv2"
image_annotator: Literal["owlv2", "clip", "owlv2-slimsam"] = "owlv2"
conf_threshold: float = 0.15
annotation_iou_threshold: float = 0.2
use_tta: bool = False
Expand Down
8 changes: 4 additions & 4 deletions datadreamer/utils/luxonis_dataset_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def dataset_generator():
masks = data[image_path]["masks"]
for mask, label in zip(masks, labels):
poly = []
for m in mask:
poly += [
(point[0] / width, point[1] / height) for point in m
]
print(mask)
poly += [
(point[0] / width, point[1] / height) for point in mask
]
yield {
"file": image_full_path,
"annotation": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
" --disable_lm_filter \\\n",
" --annotator_size base \\\n",
" --use_tta \\\n",
" --image_annotator owlv2-fastsam \\\n",
" --image_annotator owlv2-slimsam \\\n",
" --conf_threshold 0.2 \\\n",
" --seed 42"
]
Expand Down
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
torch>=2.0.0
torchvision>=0.16.0
transformers>=4.37.0
transformers>=4.45.2
diffusers>=0.24.0
compel>=2.0.0
tqdm>=4.0.0
Expand All @@ -12,7 +12,6 @@ accelerate>=0.25.0
scipy>=1.10.0
bitsandbytes>=0.42.0
nltk>=3.8.1
luxonis-ml[all]>=0.3.0
luxonis-ml[all]>=0.4.0
python-box>=7.1.1
gcsfs>=2023.1.0
ultralytics>=8.3.13
gcsfs>=2023.1.0
4 changes: 2 additions & 2 deletions tests/core_tests/integration/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def test_cpu_simple_sdxl_turbo_config_instance_segmentation_pipeline():
f"datadreamer --task instance-segmentation "
f"--save_dir {target_folder} "
f"--num_objects_range 1 2 "
f"--image_annotator owlv2-fastsam "
f"--image_annotator owlv2-slimsam "
f"--config ./tests/core_tests/integration/sample_config.yaml "
f"--device cpu"
)
Expand All @@ -204,7 +204,7 @@ def test_cuda_simple_sdxl_turbo_config_instance_segmentation_pipeline():
f"datadreamer --task instance-segmentation "
f"--save_dir {target_folder} "
f"--num_objects_range 1 2 "
f"--image_annotator owlv2-fastsam "
f"--image_annotator owlv2-slimsam "
f"--config ./tests/core_tests/integration/sample_config.yaml "
f"--device cuda"
)
Expand Down
14 changes: 7 additions & 7 deletions tests/core_tests/unittests/test_annotators.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from PIL import Image

from datadreamer.dataset_annotation.clip_annotator import CLIPAnnotator
from datadreamer.dataset_annotation.fastsam_annotator import FastSAMAnnotator
from datadreamer.dataset_annotation.fastsam_annotator import SlimSAMAnnotator
from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator

# Get the total disk space in GB
Expand Down Expand Up @@ -99,10 +99,10 @@ def test_cpu_clip_large_annotator():
_check_clip_annotator("cpu", size="large")


def _check_fastsam_annotator(device: str, size: str = "base"):
def _check_slimsam_annotator(device: str, size: str = "base"):
url = "https://ultralytics.com/images/bus.jpg"
im = Image.open(requests.get(url, stream=True).raw)
annotator = FastSAMAnnotator(device=device, size=size)
annotator = SlimSAMAnnotator(device=device, size=size)
masks = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])])
w, h = im.width, im.height
# Check that the masks are lists
Expand All @@ -124,28 +124,28 @@ def _check_fastsam_annotator(device: str, size: str = "base"):
reason="Test requires GPU and 16GB of HDD",
)
def test_cuda_fastsam_base_annotator():
_check_fastsam_annotator("cuda")
_check_slimsam_annotator("cuda")


@pytest.mark.skipif(
total_disk_space < 16,
reason="Test requires at least 16GB of HDD",
)
def test_cpu_fastsam_base_annotator():
_check_fastsam_annotator("cpu")
_check_slimsam_annotator("cpu")


@pytest.mark.skipif(
not torch.cuda.is_available() or total_disk_space < 16,
reason="Test requires GPU and 16GB of HDD",
)
def test_cuda_fastsam_large_annotator():
_check_fastsam_annotator("cuda", size="large")
_check_slimsam_annotator("cuda", size="large")


@pytest.mark.skipif(
total_disk_space < 16,
reason="Test requires at least 16GB of HDD",
)
def test_cpu_fastsam_large_annotator():
_check_fastsam_annotator("cpu", size="large")
_check_slimsam_annotator("cpu", size="large")
Loading

0 comments on commit 4fae718

Please sign in to comment.