diff --git a/.github/workflows/gar-publish-dev.yaml b/.github/workflows/gar-publish-dev.yaml index cc1646e..9b0bb31 100644 --- a/.github/workflows/gar-publish-dev.yaml +++ b/.github/workflows/gar-publish-dev.yaml @@ -15,7 +15,7 @@ jobs: steps: - name: 'Checkout GitHub Action' - uses: actions/checkout@main + uses: actions/checkout@v4 - id: 'auth' name: 'Authenticate to Google Cloud' @@ -34,5 +34,5 @@ jobs: - name: 'Build Inventory Image' working-directory: . run: | - docker build --build-arg GITHUB_TOKEN=${{secrets.GHCR_PAT}} . --tag $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer:dev - docker push $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer --all-tags + docker build --build-arg GITHUB_TOKEN=${{secrets.GHCR_PAT}} --build-arg BRANCH=${{ github.ref_name }} . --tag $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer:dev + docker push $GAR_LOCATION-docker.pkg.dev/$PROJECT_ID/internal/datadreamer --all-tags \ No newline at end of file diff --git a/.github/workflows/gar-publish.yaml b/.github/workflows/gar-publish.yaml index 3f228c1..eb21edc 100644 --- a/.github/workflows/gar-publish.yaml +++ b/.github/workflows/gar-publish.yaml @@ -4,8 +4,6 @@ name: Deploy single image to GAR (Google Artifact Registry) on: workflow_dispatch: - release: - types: [published] env: PROJECT_ID: easyml-394818 GAR_LOCATION: us-central1 diff --git a/.github/workflows/ghcr-publish-manual.yaml b/.github/workflows/ghcr-publish-manual.yaml new file mode 100644 index 0000000..8419920 --- /dev/null +++ b/.github/workflows/ghcr-publish-manual.yaml @@ -0,0 +1,41 @@ +name: Manually deploy image to GHCR + +on: + workflow_dispatch: + inputs: + branch: + description: 'Branch to deploy' + required: true + default: 'dev' + +env: + GHCR_REGISTRY: ghcr.io + IMAGE_NAME: datadreamer + +jobs: + push-store: + name: Push the image to GHCR + runs-on: ubuntu-latest + + steps: + - name: 'Checkout GitHub Action' + uses: actions/checkout@v2 + with: + ref: ${{ inputs.branch }} # Checkout the selected branch + + - name: 'Extract short commit hash' + id: commit_hash + run: echo "short_hash=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + + - name: Docker login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: luxonis-ml + password: ${{ secrets.GHCR_PAT }} + + - name: 'Build and Push Image to GHCR' + run: | + docker build --build-arg GITHUB_TOKEN=${{secrets.GHCR_PAT}} --build-arg BRANCH=${{ inputs.branch }} . \ + --tag ghcr.io/luxonis/datadreamer:${{ steps.commit_hash.outputs.short_hash }} + docker push ghcr.io/luxonis/datadreamer --all-tags diff --git a/.github/workflows/ghcr-publish.yaml b/.github/workflows/ghcr-publish.yaml index f0d2539..7786c7c 100644 --- a/.github/workflows/ghcr-publish.yaml +++ b/.github/workflows/ghcr-publish.yaml @@ -1,4 +1,4 @@ -name: Docker Build and Publish +name: Deploy latest image to GHCR on release on: workflow_dispatch: diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 0b1aaf7..6f964ac 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -2,19 +2,23 @@ name: Tests on: pull_request: - branches: [ dev, main ] + branches: [ main ] paths: - 'datadreamer/**/**.py' - - 'tests/**/**.py' + - 'tests/core_tests/**/**.py' - .github/workflows/tests.yaml + workflow_dispatch: jobs: run_tests: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macOS-latest] + os: [buildjet-8vcpu-ubuntu-2204, windows-latest, macOS-latest] version: ['3.10', '3.11'] + exclude: + - os: buildjet-8vcpu-ubuntu-2204 + version: '3.11' runs-on: ${{ matrix.os }} @@ -31,46 +35,43 @@ jobs: cache: pip - name: Install dependencies [Ubuntu] - if: matrix.os == 'ubuntu-latest' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' run: | sudo apt update sudo apt install -y pandoc pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - - name: Install dependencies [Windows] if: matrix.os == 'windows-latest' run: | pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - - name: Install dependencies [macOS] if: matrix.os == 'macOS-latest' run: | pip install -e .[dev] pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 - - name: Run tests with coverage [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - run: pytest tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' + run: pytest tests/core_tests --cov=datadreamer --cov-report xml --junit-xml pytest.xml - name: Run tests [Windows, macOS] - if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10' - run: pytest tests --junit-xml pytest.xml + if: matrix.os != 'buildjet-8vcpu-ubuntu-2204' + run: pytest tests/core_tests --junit-xml pytest.xml - name: Generate coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' run: coverage-badge -o media/coverage_badge.svg -f - name: Generate coverage report [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' uses: orgoro/coverage@v3.1 with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - name: Commit coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' run: | git config --global user.name 'GitHub Actions' git config --global user.email 'actions@github.com' @@ -78,9 +79,8 @@ jobs: git add media/coverage_badge.svg git commit -m "[Automated] Updated coverage badge" } - - name: Push changes [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + if: matrix.os == 'buildjet-8vcpu-ubuntu-2204' && matrix.version == '3.10' uses: ad-m/github-push-action@master with: branch: ${{ github.head_ref }} @@ -117,4 +117,4 @@ jobs: - name: Publish Test Results uses: EnricoMi/publish-unit-test-result-action@v2 with: - files: "artifacts/**/*.xml" \ No newline at end of file + files: "artifacts/**/*.xml" diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml new file mode 100644 index 0000000..59de92a --- /dev/null +++ b/.github/workflows/unit-tests.yaml @@ -0,0 +1,116 @@ +name: Unit tests + +on: + pull_request: + branches: [ dev ] + paths: + - 'datadreamer/**/**.py' + - 'tests/core_tests/unittests/**.py' + - .github/workflows/unit-tests.yaml + +jobs: + run_tests: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + version: ['3.10', '3.11'] + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.version }} + cache: pip + + - name: Install dependencies [Ubuntu] + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt update + sudo apt install -y pandoc + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + - name: Install dependencies [Windows] + if: matrix.os == 'windows-latest' + run: | + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + - name: Install dependencies [macOS] + if: matrix.os == 'macOS-latest' + run: | + pip install -e .[dev] + pip install coverage-badge>=1.1.0 pytest-cov>=4.1.0 + - name: Run tests with coverage [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: pytest tests/core_tests/unittests --cov=datadreamer --cov-report xml --junit-xml pytest.xml + + - name: Run tests [Windows, macOS] + if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10' + run: pytest tests/core_tests/unittests --junit-xml pytest.xml + + - name: Generate coverage badge [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: coverage-badge -o media/coverage_badge.svg -f + + - name: Generate coverage report [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + uses: orgoro/coverage@v3.1 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Commit coverage badge [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + run: | + git config --global user.name 'GitHub Actions' + git config --global user.email 'actions@github.com' + git diff --quiet media/coverage_badge.svg || { + git add media/coverage_badge.svg + git commit -m "[Automated] Updated coverage badge" + } + - name: Push changes [Ubuntu] + if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' + uses: ad-m/github-push-action@master + with: + branch: ${{ github.head_ref }} + + - name: Upload Test Results + if: always() + uses: actions/upload-artifact@v4 + with: + name: Test Results [${{ matrix.os }}] (Python ${{ matrix.version }}) + path: pytest.xml + retention-days: 10 + if-no-files-found: error + + publish-test-results: + name: "Publish Tests Results" + needs: run_tests + runs-on: ubuntu-latest + permissions: + checks: write + pull-requests: write + if: always() + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Download Artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Publish Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + with: + files: "artifacts/**/*.xml" diff --git a/Dockerfile b/Dockerfile index 20f3905..94eacea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,9 +7,14 @@ WORKDIR /app ## instal RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y RUN apt-get install -y git -RUN git clone https://github.com/luxonis/datadreamer.git -b main + +## Define a build argument for the branch, defaulting to "main" +ARG BRANCH=main + +## Clone the repository with the specified branch +RUN git clone --branch ${BRANCH} https://github.com/luxonis/datadreamer.git RUN cd datadreamer && pip install . ## define image execution -ENTRYPOINT ["datadreamer"] \ No newline at end of file +ENTRYPOINT ["datadreamer"] diff --git a/README.md b/README.md index f15831f..17921df 100644 --- a/README.md +++ b/README.md @@ -157,13 +157,13 @@ datadreamer --config ### 🔧 Additional Parameters -- `--task`: Choose between detection and classification. Default is `detection`. +- `--task`: Choose between detection, classification and instance segmentation. Default is `detection`. - `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`. - `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`. - `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3. -- `--prompt_generator`: Choose between `simple`, `lm` (language model) and `tiny` (tiny LM). Default is `simple`. +- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`. - `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`. -- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification. Default is `owlv2`. +- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`. - `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`. - `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`. - `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `""`. @@ -175,6 +175,8 @@ datadreamer --config - `--image_tester_patience`: Patience level for image tester. Default is `1`. - `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`. - `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`. +- `--disable_lm_filter`: Use only a bad word list for profanity filtering. Default is `False`. +- `--keep_unlabeled_images`: Whether to keep images without any annotations. Default if `False`. - `--batch_size_prompt`: Batch size for prompt generation. Default is 64. - `--batch_size_annotation`: Batch size for annotation. Default is `1`. - `--batch_size_image`: Batch size for image generation. Default is `1`. @@ -190,12 +192,15 @@ datadreamer --config | ----------------- | ------------------------------------------------------------------------------------- | --------------------------------------- | | Prompt Generation | [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) | Semantically rich prompts | | | [TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) | Tiny LM | +| | [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | Qwen2.5 LM | | | Simple random generator | Joins randomly chosen object names | +| Profanity Filter | [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | Fast and accurate LM profanity filter | | Image Generation | [SDXL-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) | Slow and accurate (1024x1024 images) | | | [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo) | Fast and less accurate (512x512 images) | | | [SDXL-Lightning](https://huggingface.co/ByteDance/SDXL-Lightning) | Fast and accurate (1024x1024 images) | | Image Annotation | [OWLv2](https://huggingface.co/google/owlv2-base-patch16-ensemble) | Open-Vocabulary object detector | | | [CLIP](https://huggingface.co/openai/clip-vit-base-patch32) | Zero-shot-image-classification | +| | [SlimSAM](https://huggingface.co/Zigeng/SlimSAM-uniform-50) | Zero-shot-instance-segmentation | @@ -268,6 +273,23 @@ save_dir/ } ``` +3. Instance Segmentation Annotations (instance_segmentation_annotations.json): + +- Each entry corresponds to an image and contains bounding boxes, masks and labels for objects in the image. +- Format: + +```bash +{ + "image_path": { + "boxes": [[x_min, y_min, x_max, y_max], ...], + "masks": [[[x0, y0],[x1, y1],...], [[x0, y0],[x1, y1],...], ....] + "labels": [label_index, ...] + }, + ... + "class_names": ["class1", "class2", ...] +} +``` + ## ⚠️ Limitations @@ -292,7 +314,7 @@ The above license does not cover the models. Please see the license of each mode ## 🙏 Acknowledgements -This library was made possible by the use of several open-source projects, including Transformers, Diffusers, and others listed in the requirements.txt. +This library was made possible by the use of several open-source projects, including Transformers, Diffusers, and others listed in the requirements.txt. Furthermore, we utilized a bad words list from [`@coffeeandfun/google-profanity-words`](https://github.com/coffee-and-fun/google-profanity-words) Node.js module created by Robert James Gabriel from Coffee & Fun LLC. [SD-XL 1.0 License](https://github.com/Stability-AI/generative-models/blob/main/model_licenses/LICENSE-SDXL1.0) [SDXL-Turbo License](https://github.com/Stability-AI/generative-models/blob/main/model_licenses/LICENSE-SDXL-Turbo) diff --git a/datadreamer/dataset_annotation/__init__.py b/datadreamer/dataset_annotation/__init__.py index f4da035..cfdf51a 100644 --- a/datadreamer/dataset_annotation/__init__.py +++ b/datadreamer/dataset_annotation/__init__.py @@ -3,5 +3,12 @@ from .clip_annotator import CLIPAnnotator from .image_annotator import BaseAnnotator, TaskList from .owlv2_annotator import OWLv2Annotator +from .slimsam_annotator import SlimSAMAnnotator -__all__ = ["BaseAnnotator", "TaskList", "OWLv2Annotator", "CLIPAnnotator"] +__all__ = [ + "BaseAnnotator", + "TaskList", + "OWLv2Annotator", + "CLIPAnnotator", + "SlimSAMAnnotator", +] diff --git a/datadreamer/dataset_annotation/clip_annotator.py b/datadreamer/dataset_annotation/clip_annotator.py index ff7b9aa..a39d1c6 100644 --- a/datadreamer/dataset_annotation/clip_annotator.py +++ b/datadreamer/dataset_annotation/clip_annotator.py @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import List +import logging +from typing import Dict, List import numpy as np import PIL @@ -10,6 +11,8 @@ from datadreamer.dataset_annotation.image_annotator import BaseAnnotator, TaskList +logger = logging.getLogger(__name__) + class CLIPAnnotator(BaseAnnotator): """A class for image annotation using the CLIP model, specializing in image @@ -47,7 +50,7 @@ def __init__( self.device = device self.model.to(self.device) - def _init_processor(self): + def _init_processor(self) -> CLIPProcessor: """Initializes the CLIP processor. Returns: @@ -57,12 +60,13 @@ def _init_processor(self): return CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14") return CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") - def _init_model(self): + def _init_model(self) -> CLIPModel: """Initializes the CLIP model. Returns: CLIPModel: The initialized CLIP model. """ + logger.info(f"Initializing CLIP {self.size} model...") if self.size == "large": return CLIPModel.from_pretrained("openai/clip-vit-large-patch14") return CLIPModel.from_pretrained("openai/clip-vit-base-patch32") @@ -72,7 +76,7 @@ def annotate_batch( images: List[PIL.Image.Image], objects: List[str], conf_threshold: float = 0.1, - synonym_dict: dict[str, List[str]] | None = None, + synonym_dict: Dict[str, List[str]] | None = None, ) -> List[np.ndarray]: """Annotates images using the OWLv2 model. @@ -83,7 +87,7 @@ def annotate_batch( synonym_dict (dict, optional): Dictionary for handling synonyms in labels. Defaults to None. Returns: - List[List[int]]: A list of lists of labels for each image. + List[np.ndarray]: A list of the annotations for each image. """ if synonym_dict is not None: objs_syn = set() diff --git a/datadreamer/dataset_annotation/image_annotator.py b/datadreamer/dataset_annotation/image_annotator.py index 4479ffe..757baab 100644 --- a/datadreamer/dataset_annotation/image_annotator.py +++ b/datadreamer/dataset_annotation/image_annotator.py @@ -4,15 +4,12 @@ from abc import ABC, abstractmethod -# Enum for different labeling tasks class TaskList(enum.Enum): CLASSIFICATION = "classification" OBJECT_DETECTION = "object_detection" SEGMENTATION = "segmentation" - # Add more tasks as needed -# Abstract base class for data labeling class BaseAnnotator(ABC): """Abstract base class for creating annotators. @@ -24,6 +21,8 @@ class BaseAnnotator(ABC): Methods: annotate_batch(): Abstract method to be implemented by subclasses. It should contain the logic for performing annotation based on the task definition. + release(): Abstract method to be implemented by subclasses. It should contain + the logic for releasing the resources used by the annotator. """ def __init__( @@ -35,3 +34,7 @@ def __init__( @abstractmethod def annotate_batch(self): pass + + @abstractmethod + def release(self, empty_cuda_cache=False) -> None: + pass diff --git a/datadreamer/dataset_annotation/owlv2_annotator.py b/datadreamer/dataset_annotation/owlv2_annotator.py index 1d4243c..9da41b4 100644 --- a/datadreamer/dataset_annotation/owlv2_annotator.py +++ b/datadreamer/dataset_annotation/owlv2_annotator.py @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import List, Tuple +import logging +from typing import Dict, List, Tuple import numpy as np import PIL @@ -11,6 +12,8 @@ from datadreamer.dataset_annotation.utils import apply_tta from datadreamer.utils.nms import non_max_suppression +logger = logging.getLogger(__name__) + class OWLv2Annotator(BaseAnnotator): """A class for image annotation using the OWLv2 model, specializing in object @@ -48,12 +51,13 @@ def __init__( self.device = device self.model.to(self.device) - def _init_model(self): + def _init_model(self) -> Owlv2ForObjectDetection: """Initializes the OWLv2 model for object detection. Returns: Owlv2ForObjectDetection: The initialized OWLv2 model. """ + logger.info(f"Initializing OWLv2 {self.size} model...") if self.size == "large": return Owlv2ForObjectDetection.from_pretrained( "google/owlv2-large-patch14-ensemble" @@ -62,7 +66,7 @@ def _init_model(self): "google/owlv2-base-patch16-ensemble" ) - def _init_processor(self): + def _init_processor(self) -> Owlv2Processor: """Initializes the processor for the OWLv2 model. Returns: @@ -81,7 +85,7 @@ def _generate_annotations( images: List[PIL.Image.Image], prompts: List[str], conf_threshold: float = 0.1, - ) -> List[dict[str, torch.Tensor]]: + ) -> List[Dict[str, torch.Tensor]]: """Generates annotations for the given images and prompts. Args: @@ -90,11 +94,11 @@ def _generate_annotations( conf_threshold (float, optional): Confidence threshold for the annotations. Defaults to 0.1. Returns: - dict: A dictionary containing the annotations for the images. + List[Dict[str, torch.Tensor]]: The annotations for the given images and prompts. """ n = len(images) batched_prompts = [prompts] * n - target_sizes = torch.Tensor(images[0].size[::-1]).repeat((n, 1)).to(self.device) + target_sizes = torch.Tensor([img.size[::-1] for img in images]).to(self.device) # resize the images to the model's input size img_size = (1008, 1008) if self.size == "large" else (960, 960) @@ -104,11 +108,9 @@ def _generate_annotations( images=images, return_tensors="pt", padding="max_length", - truncation=True, ).to(self.device) with torch.no_grad(): outputs = self.model(**inputs) - # print(outputs) preds = self.processor.post_process_object_detection( outputs=outputs, target_sizes=target_sizes, threshold=conf_threshold ) @@ -117,18 +119,20 @@ def _generate_annotations( def _get_annotations( self, - pred: dict[str, torch.Tensor], + pred: Dict[str, torch.Tensor], use_tta: bool, - img_dim: int, - synonym_dict: dict[str, List[str]] | None, - synonym_dict_rev: dict[int, int] | None, + img_width: int, + img_height: int, + synonym_dict: Dict[str, List[str]] | None, + synonym_dict_rev: Dict[int, int] | None, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """Extracts the annotations from the predictions. Args: pred: The predictions from the model. use_tta (bool): Flag to whether the test-time augmentation was applied. - img_dim (int): The dimension of the image. + img_width (int): The width of the image. + img_height (int): The height of the image. synonym_dict (dict): Dictionary for handling synonyms in labels. synonym_dict_rev (dict): Dictionary for handling synonyms in labels. @@ -141,17 +145,44 @@ def _get_annotations( pred["scores"], pred["labels"], ) - # Flip boxes back if using TTA - if use_tta: - boxes[:, [0, 2]] = img_dim - boxes[:, [2, 0]] if synonym_dict is not None: labels = torch.tensor( [synonym_dict_rev[label.item()] for label in labels], dtype=torch.int64 ) + boxes = self._correct_bboxes_misalignment(boxes, img_width, img_height) + + # Flip boxes back if using TTA + if use_tta: + boxes[:, [0, 2]] = img_width - boxes[:, [2, 0]] + return boxes, scores, labels + def _correct_bboxes_misalignment( + self, input_boxes: torch.Tensor, width: int, height: int + ) -> List[torch.Tensor]: + """This function corrects the bounding boxes misalignment appearing when using + the `transformers==4.45.2`. + + Problem description: With a non-square aspect ratio, the predictions are shifted in the smaller dimension. + Solution: https://discuss.huggingface.co/t/owl-v2-bounding-box-misalignment-problem/66181 + + Args: + input_boxes (torch.Tensor): The bounding boxes to be corrected. + width (int): The width of the image. + height (int): The height of the image. + + Returns: + List[torch.Tensor]: The corrected bounding boxes. + """ + width_ratio = width / height if width < height else 1 + height_ratio = height / width if height < width else 1 + ratios = torch.tensor( + [width_ratio, height_ratio] * 2, device=input_boxes.device + ) + return input_boxes * ratios + def annotate_batch( self, images: List[PIL.Image.Image], @@ -159,7 +190,7 @@ def annotate_batch( conf_threshold: float = 0.1, iou_threshold: float = 0.2, use_tta: bool = False, - synonym_dict: dict[str, List[str]] | None = None, + synonym_dict: Dict[str, List[str]] | None = None, ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]: """Annotates images using the OWLv2 model. @@ -204,31 +235,34 @@ def annotate_batch( final_labels = [] for i, (pred, aug_pred) in enumerate(zip(preds, augmented_preds)): + img_width, img_height = images[i].size boxes, scores, labels = self._get_annotations( pred, False, - images[i].size[0], + img_width, + img_height, synonym_dict, synonym_dict_rev if synonym_dict is not None else None, ) - all_boxes = [boxes.to("cpu")] - all_scores = [scores.to("cpu")] - all_labels = [labels.to("cpu")] + all_boxes = [boxes.cpu()] + all_scores = [scores.cpu()] + all_labels = [labels.cpu()] # Flip boxes back if using TTA if use_tta: aug_boxes, aug_scores, aug_labels = self._get_annotations( aug_pred, True, - images[i].size[0], + img_width, + img_height, synonym_dict, synonym_dict_rev if synonym_dict is not None else None, ) - all_boxes.append(aug_boxes.to("cpu")) - all_scores.append(aug_scores.to("cpu")) - all_labels.append(aug_labels.to("cpu")) + all_boxes.append(aug_boxes.cpu()) + all_scores.append(aug_scores.cpu()) + all_labels.append(aug_labels.cpu()) one_hot_labels = torch.nn.functional.one_hot( torch.cat(all_labels), num_classes=len(prompts) @@ -292,8 +326,8 @@ def release(self, empty_cuda_cache: bool = False) -> None: url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) - annotator = OWLv2Annotator(device="cpu", size="large") + annotator = OWLv2Annotator(device="cpu", size="base") final_boxes, final_scores, final_labels = annotator.annotate_batch( - [im], ["robot", "horse"] + [im], ["bus", "person"] ) annotator.release() diff --git a/datadreamer/dataset_annotation/slimsam_annotator.py b/datadreamer/dataset_annotation/slimsam_annotator.py new file mode 100644 index 0000000..7f6de7b --- /dev/null +++ b/datadreamer/dataset_annotation/slimsam_annotator.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import logging +from typing import List + +import numpy as np +import PIL +import torch +from transformers import SamModel, SamProcessor + +from datadreamer.dataset_annotation.image_annotator import BaseAnnotator +from datadreamer.dataset_annotation.utils import mask_to_polygon + +logger = logging.getLogger(__name__) + + +class SlimSAMAnnotator(BaseAnnotator): + """A class for image annotation using the SlimSAM model, specializing in instance + segmentation. + + Attributes: + model (SAM): The SAM model for instance segmentation. + processor (SamProcessor): The processor for the SAM model. + device (str): The device on which the model will run ('cuda' for GPU, 'cpu' for CPU). + size (str): The size of the SAM model to use ('base' or 'large'). + + Methods: + _init_model(): Initializes the SAM model. + _init_processor(): Initializes the processor for the SAM model. + annotate_batch(image, prompts, conf_threshold, use_tta, synonym_dict): Annotates the given image with bounding boxes and labels. + release(empty_cuda_cache): Releases resources and optionally empties the CUDA cache. + """ + + def __init__( + self, + seed: float = 42, + device: str = "cuda", + size: str = "base", + ) -> None: + """Initializes the SAMAnnotator with a specific seed and device. + + Args: + seed (float): Seed for reproducibility. Defaults to 42. + device (str): The device to run the model on. Defaults to 'cuda'. + """ + super().__init__(seed) + self.size = size + self.model = self._init_model() + self.processor = self._init_processor() + self.device = device + self.model.to(self.device) + + def _init_model(self) -> SamModel: + """Initializes the SAM model for object detection. + + Returns: + SamModel: The initialized SAM model. + """ + logger.info(f"Initializing SlimSAM {self.size} model...") + if self.size == "large": + return SamModel.from_pretrained("Zigeng/SlimSAM-uniform-50") + return SamModel.from_pretrained("Zigeng/SlimSAM-uniform-77") + + def _init_processor(self) -> SamProcessor: + """Initializes the processor for the SAM model. + + Returns: + SamProcessor: The initialized processor. + """ + if self.size == "large": + return SamProcessor.from_pretrained("Zigeng/SlimSAM-uniform-50") + return SamProcessor.from_pretrained("Zigeng/SlimSAM-uniform-77") + + def annotate_batch( + self, + images: List[PIL.Image.Image], + boxes_batch: List[np.ndarray], + iou_threshold: float = 0.2, + ) -> List[List[List[float]]]: + """Annotates images for the task of instance segmentation using the SlimSAM + model. + + Args: + images: The images to be annotated. + boxes_batch: The bounding boxes of found objects. + iou_threshold (float, optional): Intersection over union threshold for non-maximum suppression. Defaults to 0.2. + + Returns: + List: A list containing the final segment masks represented as a polygon. + """ + final_segments = [] + + n = len(images) + + for i in range(n): + boxes = boxes_batch[i].tolist() + if len(boxes) == 0: + final_segments.append([]) + continue + + inputs = self.processor( + images[i], input_boxes=[boxes], return_tensors="pt" + ).to(self.device) + + with torch.no_grad(): + outputs = self.model(**inputs, return_dict=True) + + masks = self.processor.image_processor.post_process_masks( + outputs.pred_masks.cpu(), + inputs["original_sizes"].cpu(), + inputs["reshaped_input_sizes"].cpu(), + )[0] + + iou_scores = outputs.iou_scores.cpu() + + image_masks = [] + for j in range(len(boxes)): + keep_idx = iou_scores[0, j] >= iou_threshold + filtered_masks = masks[j, keep_idx].cpu().float() + final_masks = filtered_masks.permute(1, 2, 0) + final_masks = final_masks.mean(axis=-1) + final_masks = (final_masks > 0).int() + final_masks = final_masks.numpy().astype(np.uint8) + polygon = mask_to_polygon(final_masks) + if len(polygon) != 0: + image_masks.append(polygon) + + final_segments.append(image_masks) + + return final_segments + + def release(self, empty_cuda_cache: bool = False) -> None: + """Releases the model and optionally empties the CUDA cache. + + Args: + empty_cuda_cache (bool, optional): Whether to empty the CUDA cache. Defaults to False. + """ + self.model = self.model.to("cpu") + if empty_cuda_cache: + with torch.no_grad(): + torch.cuda.empty_cache() + + +if __name__ == "__main__": + import requests + from PIL import Image + + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = SlimSAMAnnotator(device="cpu", size="large") + final_segments = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])]) + print(len(final_segments), len(final_segments[0])) + print(final_segments[0][0][:5]) diff --git a/datadreamer/dataset_annotation/utils.py b/datadreamer/dataset_annotation/utils.py index 942d1a4..dd6b643 100644 --- a/datadreamer/dataset_annotation/utils.py +++ b/datadreamer/dataset_annotation/utils.py @@ -1,9 +1,13 @@ from __future__ import annotations +from typing import List + +import cv2 +import numpy as np from torchvision import transforms -def apply_tta(image): +def apply_tta(image) -> List[transforms.Compose]: """Apply test-time augmentation (TTA) to the given image. Args: @@ -30,3 +34,27 @@ def apply_tta(image): augmented_images = [t(image) for t in tta_transforms] return augmented_images + + +def mask_to_polygon(mask: np.ndarray) -> List[List[int]]: + """Converts a binary mask to a polygon. + + Args: + mask: The binary mask to be converted. + + Returns: + List: A list of vertices of the polygon. + """ + # Find contours in the binary mask + contours, _ = cv2.findContours( + mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE + ) + if len(contours) == 0: + return [] + # Find the contour with the largest area + largest_contour = max(contours, key=cv2.contourArea) + + # Extract the vertices of the contour + polygon = largest_contour.reshape(-1, 2).tolist() + + return polygon diff --git a/datadreamer/image_generation/clip_image_tester.py b/datadreamer/image_generation/clip_image_tester.py index 2c67965..8147533 100644 --- a/datadreamer/image_generation/clip_image_tester.py +++ b/datadreamer/image_generation/clip_image_tester.py @@ -1,11 +1,14 @@ from __future__ import annotations -from typing import List +import logging +from typing import List, Tuple import torch from PIL import Image from transformers import CLIPModel, CLIPProcessor +logger = logging.getLogger(__name__) + class ClipImageTester: """A class for testing images against a set of textual objects using the CLIP model. @@ -22,6 +25,7 @@ class ClipImageTester: def __init__(self, device: str = "cuda") -> None: """Initializes the ClipImageTester with the CLIP model and processor.""" + logger.info("Initializing CLIP image tester...") self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") self.clip_processor = CLIPProcessor.from_pretrained( "openai/clip-vit-base-patch32" @@ -29,7 +33,9 @@ def __init__(self, device: str = "cuda") -> None: self.device = device self.clip.to(self.device) - def test_image(self, image: Image.Image, objects: List[str], conf_threshold=0.05): + def test_image( + self, image: Image.Image, objects: List[str], conf_threshold: float = 0.05 + ) -> Tuple[bool, torch.Tensor, int]: """Tests the generated image against a set of objects using the CLIP model. Args: @@ -60,8 +66,8 @@ def test_images_batch( self, images: List[Image.Image], objects: List[List[str]], - conf_threshold=0.05, - ) -> List[tuple]: + conf_threshold: float = 0.05, + ) -> Tuple[List[bool], List[torch.Tensor], List[int]]: """Tests the generated images against a set of objects using the CLIP model. Args: @@ -70,8 +76,8 @@ def test_images_batch( conf_threshold (float, optional): Confidence threshold for considering an object as present. Defaults to 0.05. Returns: - List[tuple]: A list of tuples containing a boolean indicating if the image passes the test, - the probabilities of the objects, and the number of objects that passed the test. + Tuple[List[bool], List[torch.Tensor], List[int]]: A tuple containing a list of booleans indicating if the images pass the test, + a list of probabilities of the objects, and a list of the number of objects that passed the test. """ # Transform the inputs for the CLIP model objects_array = [] diff --git a/datadreamer/image_generation/image_generator.py b/datadreamer/image_generation/image_generator.py index 4b01f81..bfbc53d 100644 --- a/datadreamer/image_generation/image_generator.py +++ b/datadreamer/image_generation/image_generator.py @@ -30,7 +30,7 @@ class ImageGenerator: set_seed(seed): Sets the seed for random number generators. generate_images(prompts, prompt_objects): Generates images based on provided prompts and optional object prompts. release(empty_cuda_cache): Releases resources and optionally empties the CUDA cache. (Abstract method) - generate_image(prompt, negative_prompt, prompt_objects): Generates a single image based on the provided prompt. (Abstract method) + generate_images_batch(prompts, negative_prompt, prompt_objects): Generates a batch of images based on the provided prompts. Abstract method) Note: The actual model for image generation needs to be defined in the subclass. @@ -64,7 +64,7 @@ def __init__( self.set_seed(seed) @staticmethod - def set_seed(seed: int): + def set_seed(seed: int) -> None: """Sets the seed for random number generators in Python and PyTorch. Args: @@ -78,7 +78,7 @@ def generate_images( self, prompts: Union[str, List[str]], prompt_objects: Optional[List[List[str]]] = None, - ): + ) -> List[Image.Image]: """Generates images based on the provided prompts and optional object prompts. Args: @@ -151,20 +151,20 @@ def release(self, empty_cuda_cache=False) -> None: pass @abstractmethod - def generate_image( + def generate_images_batch( self, - prompt: str, + prompts: List[str], negative_prompt: str, - prompt_objects: Optional[List[str]] = None, - ) -> Image.Image: - """Generates a single image based on the provided prompt. + prompt_objects: Optional[List[List[str]]] = None, + ) -> List[Image.Image]: + """Generates a batch of images based on the provided prompts. Args: - prompt (str): The positive prompt to guide image generation. + prompts (List[str]): A list of positive prompts to guide image generation. negative_prompt (str): The negative prompt to avoid certain features in the image. - prompt_objects (Optional[List[str]]): Optional list of objects to be used in CLIP model testing. + prompt_objects (Optional[List[List[str]]]): Optional list of objects to be used in CLIP model testing. Returns: - Image.Image: The generated image. + List[Image.Image]: A list of generated images. """ pass diff --git a/datadreamer/image_generation/sdxl_image_generator.py b/datadreamer/image_generation/sdxl_image_generator.py index 1882f4a..3c090de 100644 --- a/datadreamer/image_generation/sdxl_image_generator.py +++ b/datadreamer/image_generation/sdxl_image_generator.py @@ -1,13 +1,17 @@ from __future__ import annotations -from typing import List, Optional +import logging +from typing import List, Optional, Tuple import torch from compel import Compel, ReturnedEmbeddingsType from diffusers import DiffusionPipeline +from PIL import Image from datadreamer.image_generation.image_generator import ImageGenerator +logger = logging.getLogger(__name__) + class StableDiffusionImageGenerator(ImageGenerator): """A subclass of ImageGenerator that uses the Stable Diffusion model for image @@ -32,14 +36,14 @@ def __init__(self, *args, **kwargs): self.base, self.refiner = self._init_gen_model() self.base_processor, self.refiner_processor = self._init_processor() - def _init_gen_model(self): + def _init_gen_model(self) -> Tuple[DiffusionPipeline, DiffusionPipeline]: """Initializes the base and refiner models of Stable Diffusion. Returns: tuple: The base and refiner models. """ + logger.info(f"Initializing SDXL on {self.device}...") if self.device == "cpu": - print("Loading SDXL on CPU...") base = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", # variant="fp16", @@ -57,7 +61,6 @@ def _init_gen_model(self): ) refiner.to("cpu") else: - print("Loading SDXL on GPU...") base = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, @@ -77,7 +80,7 @@ def _init_gen_model(self): return base, refiner - def _init_processor(self): + def _init_processor(self) -> Tuple[Compel, Compel]: """Initializes the processors for the base and refiner models. Returns: @@ -102,7 +105,7 @@ def generate_images_batch( prompts: List[str], negative_prompt: str, prompt_objects: Optional[List[List[str]]] = None, - ): + ) -> List[Image.Image]: """Generates a batch of images based on the provided prompts. Args: diff --git a/datadreamer/image_generation/sdxl_lightning_image_generator.py b/datadreamer/image_generation/sdxl_lightning_image_generator.py index 33c5141..f4520e4 100644 --- a/datadreamer/image_generation/sdxl_lightning_image_generator.py +++ b/datadreamer/image_generation/sdxl_lightning_image_generator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from typing import List, Optional import torch @@ -15,6 +16,8 @@ from datadreamer.image_generation.image_generator import ImageGenerator +logger = logging.getLogger(__name__) + class StableDiffusionLightningImageGenerator(ImageGenerator): """A subclass of ImageGenerator specifically designed to use the Stable Diffusion @@ -37,7 +40,7 @@ def __init__(self, *args, **kwargs): self.pipe = self._init_gen_model() self.compel = self._init_compel() - def _init_gen_model(self): + def _init_gen_model(self) -> StableDiffusionXLPipeline: """Initializes the Stable Diffusion Lightning model for image generation. Returns: @@ -46,16 +49,15 @@ def _init_gen_model(self): base = "stabilityai/stable-diffusion-xl-base-1.0" repo = "ByteDance/SDXL-Lightning" ckpt = "sdxl_lightning_4step_unet.safetensors" # Use the correct ckpt for your step setting! + config = UNet2DConditionModel.load_config(base, subfolder="unet") - # Load model. + logger.info(f"Initializing SDXL Lightning on {self.device}...") if self.device == "cpu": - print("Loading SDXL Lightning on CPU...") - unet = UNet2DConditionModel.from_config(base, subfolder="unet") + unet = UNet2DConditionModel.from_config(config) unet.load_state_dict(load_file(hf_hub_download(repo, ckpt))) pipe = StableDiffusionXLPipeline.from_pretrained(base, unet=unet) else: - print("Loading SDXL Lightning on GPU...") - unet = UNet2DConditionModel.from_config(base, subfolder="unet").to( + unet = UNet2DConditionModel.from_config(config).to( self.device, torch.float16 ) unet.load_state_dict( @@ -73,7 +75,7 @@ def _init_gen_model(self): return pipe - def _init_compel(self): + def _init_compel(self) -> Compel: """Initializes the Compel model for text prompt weighting. Returns: @@ -92,7 +94,6 @@ def generate_images_batch( prompts: List[str], negative_prompt: str, prompt_objects: Optional[List[List[str]]] = None, - batch_size: int = 1, ) -> List[Image.Image]: """Generates a batch of images using the Stable Diffusion Lightning model based on the provided prompts. @@ -101,7 +102,6 @@ def generate_images_batch( prompts (List[str]): A list of positive prompts to guide image generation. negative_prompt (str): The negative prompt to avoid certain features in the image. prompt_objects (Optional[List[List[str]]]): Optional list of objects for each prompt for CLIP model testing. - batch_size (int): The number of images to generate in each batch. Returns: List[Image.Image]: A list of generated images. diff --git a/datadreamer/image_generation/sdxl_turbo_image_generator.py b/datadreamer/image_generation/sdxl_turbo_image_generator.py index e78fa17..72cb2be 100644 --- a/datadreamer/image_generation/sdxl_turbo_image_generator.py +++ b/datadreamer/image_generation/sdxl_turbo_image_generator.py @@ -1,13 +1,17 @@ from __future__ import annotations +import logging from typing import List, Optional import torch +from compel import Compel, ReturnedEmbeddingsType from diffusers import AutoPipelineForText2Image from PIL import Image from datadreamer.image_generation.image_generator import ImageGenerator +logger = logging.getLogger(__name__) + class StableDiffusionTurboImageGenerator(ImageGenerator): """A subclass of ImageGenerator specifically designed to use the Stable Diffusion @@ -27,15 +31,16 @@ def __init__(self, *args, **kwargs): arguments.""" super().__init__(*args, **kwargs) self.base = self._init_gen_model() + self.compel = self._init_compel() - def _init_gen_model(self): + def _init_gen_model(self) -> AutoPipelineForText2Image: """Initializes the Stable Diffusion Turbo model for image generation. Returns: AutoPipelineForText2Image: The initialized Stable Diffusion Turbo model. """ + logger.info(f"Initializing SDXL Turbo on {self.device}...") if self.device == "cpu": - print("Loading SDXL Turbo on CPU...") base = AutoPipelineForText2Image.from_pretrained( "stabilityai/sdxl-turbo", # variant="fp16", @@ -44,7 +49,6 @@ def _init_gen_model(self): ) base.to("cpu") else: - print("Loading SDXL Turbo on GPU...") base = AutoPipelineForText2Image.from_pretrained( "stabilityai/sdxl-turbo", torch_dtype=torch.float16, @@ -55,6 +59,20 @@ def _init_gen_model(self): return base + def _init_compel(self) -> Compel: + """Initializes the Compel model for text prompt weighting. + + Returns: + Compel: The initialized Compel model. + """ + compel = Compel( + tokenizer=[self.base.tokenizer, self.base.tokenizer_2], + text_encoder=[self.base.text_encoder, self.base.text_encoder_2], + returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, + requires_pooled=[False, True], + ) + return compel + def generate_images_batch( self, prompts: List[str], @@ -74,9 +92,18 @@ def generate_images_batch( Returns: List[Image.Image]: A list of generated images. """ + if prompt_objects is not None: + for i in range(len(prompt_objects)): + for obj in prompt_objects[i]: + prompts[i] = prompts[i].replace(obj, f"({obj})1.5", 1) + + conditioning, pooled = self.compel(prompts) + conditioning_neg, pooled_neg = self.compel([negative_prompt] * len(prompts)) images = self.base( - prompt=prompts, - negative_prompt=negative_prompt, + prompt_embeds=conditioning, + pooled_prompt_embeds=pooled, + negative_prompt_embeds=conditioning_neg, + negative_pooled_prompt_embeds=pooled_neg, guidance_scale=0.0, num_inference_steps=4, ).images diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index cc5750e..4d52acb 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -3,6 +3,7 @@ import argparse import os import shutil +import textwrap import uuid import matplotlib.patches as patches @@ -10,10 +11,16 @@ import numpy as np import torch from box import Box +from luxonis_ml.data import DATASETS_REGISTRY, LOADERS_REGISTRY +from luxonis_ml.utils import setup_logging from PIL import Image from tqdm import tqdm -from datadreamer.dataset_annotation import CLIPAnnotator, OWLv2Annotator +from datadreamer.dataset_annotation import ( + CLIPAnnotator, + OWLv2Annotator, + SlimSAMAnnotator, +) from datadreamer.image_generation import ( StableDiffusionImageGenerator, StableDiffusionLightningImageGenerator, @@ -22,6 +29,8 @@ from datadreamer.prompt_generation import ( LMPromptGenerator, LMSynonymGenerator, + ProfanityFilter, + Qwen2LMPromptGenerator, SimplePromptGenerator, TinyLlamaLMPromptGenerator, WordNetSynonymGenerator, @@ -33,6 +42,7 @@ "simple": SimplePromptGenerator, "lm": LMPromptGenerator, "tiny": TinyLlamaLMPromptGenerator, + "qwen2": Qwen2LMPromptGenerator, } synonym_generators = { @@ -48,6 +58,10 @@ det_annotators = {"owlv2": OWLv2Annotator} clf_annotators = {"clip": CLIPAnnotator} +inst_seg_annotators = {"owlv2-slimsam": SlimSAMAnnotator} +inst_seg_detectors = {"owlv2-slimsam": OWLv2Annotator} + +setup_logging(use_rich=True) def parse_args(): @@ -62,7 +76,7 @@ def parse_args(): parser.add_argument( "--task", type=str, - choices=["detection", "classification"], + choices=["detection", "classification", "instance-segmentation"], help="Task to generate data for", ) @@ -96,8 +110,8 @@ def parse_args(): parser.add_argument( "--prompt_generator", type=str, - choices=["simple", "lm", "tiny"], - help="Prompt generator to use: simple or language model", + choices=["simple", "lm", "tiny", "qwen2"], + help="Prompt generator to use: simple, lm, tiny, or qwen2 (default).", ) parser.add_argument( "--image_generator", @@ -108,7 +122,7 @@ def parse_args(): parser.add_argument( "--image_annotator", type=str, - choices=["owlv2", "clip"], + choices=["owlv2", "clip", "owlv2-slimsam"], help="Image annotator to use", ) @@ -196,6 +210,20 @@ def parse_args(): help="Size of the annotator model to use", ) + parser.add_argument( + "--disable_lm_filter", + default=None, + action="store_true", + help="Whether to use only bad words in profanity filter", + ) + + parser.add_argument( + "--keep_unlabeled_images", + default=None, + action="store_true", + help="Whether to keep images without any annotations", + ) + parser.add_argument( "--batch_size_prompt", type=int, @@ -227,6 +255,24 @@ def parse_args(): help="Path to the configuration file", ) + parser.add_argument( + "--dataset_plugin", + type=str, + help="LuxonisDataset plugin for the luxonis-dataset format", + ) + + parser.add_argument( + "--loader_plugin", + type=str, + help="Loader plugin for the LuxonisLoader", + ) + + parser.add_argument( + "--dataset_name", + type=str, + help="Name of the dataset to create if dataset_plugin or loader_plugin is used", + ) + parser.add_argument( "--seed", type=int, @@ -291,10 +337,10 @@ def check_args(args): if args.lm_quantization != "none" and ( args.device == "cpu" or not torch.cuda.is_available() - or args.prompt_generator != "lm" + or args.prompt_generator not in ["lm", "qwen2"] ): raise ValueError( - "LM Quantization is only available for CUDA devices and Mistral LM" + "LM Quantization is only available for CUDA devices and Mistral/Qwen2.5 prompt generators" ) # Check batch_size_prompt @@ -324,6 +370,14 @@ def check_args(args): "--image_annotator must be one of the available annotators for classification task" ) + if ( + args.task == "instance-segmentation" + and args.image_annotator not in inst_seg_annotators + ): + raise ValueError( + "--image_annotator must be one of the available annotators for instance segmentation task" + ) + # Check coorect task and dataset_format if args.task == "classification" and args.dataset_format in ["coco", "yolo"]: raise ValueError( @@ -335,6 +389,11 @@ def check_args(args): "--dataset_format must be one of the available dataset formats for detection task: raw, coco, yolo, luxonis-dataset" ) + if args.task == "instance-segmentation" and args.dataset_format in ["cls-single"]: + raise ValueError( + "--dataset_format must be one of the available dataset formats for instance segmentation task: raw, coco, yolo, luxonis-dataset" + ) + # Check split_ratios if ( len(args.split_ratios) != 3 @@ -345,6 +404,17 @@ def check_args(args): "--split_ratios must be a list of three floats that sum up to 1" ) + # Check if dataset_plugin is valid + if args.dataset_plugin: + if args.dataset_format != "luxonis-dataset": + raise ValueError( + "--dataset_format must be 'luxonis-dataset' if --dataset_plugin is specified" + ) + if args.dataset_plugin not in DATASETS_REGISTRY.module_dict: + raise ValueError( + f"Invalid dataset plugin: {args.dataset_plugin}. Available plugins: {list(DATASETS_REGISTRY.module_dict.keys())}" + ) + def main(): args = parse_args() @@ -355,6 +425,14 @@ def main(): # Check arguments check_args(args) + profanity_filter = ProfanityFilter( + seed=args.seed, device=args.device, use_lm=not args.disable_lm_filter + ) + # Check class names for bad words + if not profanity_filter.is_safe(args.class_names): + raise ValueError(f"Class names: '{args.class_names}' contain bad words!") + profanity_filter.release(empty_cuda_cache=True) + # Directories for saving images and bboxes save_dir = args.save_dir if not args.annotate_only: @@ -373,6 +451,12 @@ def main(): generated_prompts = None image_paths = [] + def split_image_paths(image_paths, batch_size): + return [ + image_paths[i : i + batch_size] + for i in range(0, len(image_paths), batch_size) + ] + if not args.annotate_only: # Prompt generation prompt_generator_class = prompt_generators[args.prompt_generator] @@ -421,12 +505,33 @@ def main(): image_generator.release(empty_cuda_cache=True) + # Split image_paths into batches + image_batches = split_image_paths(image_paths, args.batch_size_annotation) + else: - # Load image paths for annotation - for image_path in os.listdir(save_dir): - # Check file extension: jpg, png, jpeg - if image_path.lower().endswith((".jpg", ".png", ".jpeg", ".bmp", "webp")): - image_paths.append(os.path.join(save_dir, image_path)) + if args.loader_plugin: + if "DATASET_ID" in os.environ: + image_batches = LOADERS_REGISTRY.get(args.loader_plugin)( + view="all", + dataset_id=os.getenv("DATASET_ID"), + sync_target_directory=save_dir, + load_image_paths=True, + ) + else: + raise ValueError( + "DATASET_ID environment variable is not set for using the loader plugin" + ) + + else: + # Load image paths for annotation + for image_path in os.listdir(save_dir): + # Check file extension: jpg, png, jpeg + if image_path.lower().endswith( + (".jpg", ".png", ".jpeg", ".bmp", "webp") + ): + image_paths.append(os.path.join(save_dir, image_path)) + # Split image_paths into batches + image_batches = split_image_paths(image_paths, args.batch_size_annotation) # Synonym generation synonym_dict = None @@ -439,27 +544,48 @@ def main(): synonym_dict, os.path.join(save_dir, "synonyms.json") ) + def read_image_batch(image_batch, batch_num, batch_size): + if type(image_batch[0]) == np.ndarray: + images = [] + batch_image_paths = [] + for i, image in enumerate(image_batch[:-1]): + image = Image.fromarray(image) + unique_id = uuid.uuid4().hex + image_path = os.path.join( + save_dir, f"image_{batch_num * batch_size + i}_{unique_id}.jpg" + ) + image.save(image_path) + images.append(image) + batch_image_paths.append(image_path) + + else: + images = [ + Image.open(image_path).convert("RGB") for image_path in image_batch + ] + batch_image_paths = image_batch + return images, batch_image_paths + boxes_list = [] scores_list = [] labels_list = [] + segment_list = [] + image_paths = [] if args.task == "classification": # Classification annotation annotator_class = clf_annotators[args.image_annotator] annotator = annotator_class(device=args.device, size=args.annotator_size) - # Split image_paths into batches - image_batches = [ - image_paths[i : i + args.batch_size_annotation] - for i in range(0, len(image_paths), args.batch_size_annotation) - ] - - for image_batch in tqdm( - image_batches, + for i, image_batch in tqdm( + enumerate(image_batches), desc="Annotating images", total=len(image_batches), ): - images = [Image.open(image_path) for image_path in image_batch] + images, batch_image_paths = read_image_batch( + image_batch, i, args.batch_size_annotation + ) + image_paths.extend(batch_image_paths) + batch_labels = annotator.annotate_batch( images, args.class_names, @@ -485,21 +611,27 @@ def main(): seed=args.seed, ) else: - # Annotation - annotator_class = det_annotators[args.image_annotator] + # Detection annotation + if args.task == "detection": + annotator_class = det_annotators[args.image_annotator] + else: + annotator_class = inst_seg_detectors[args.image_annotator] + inst_seg_annotator_class = inst_seg_annotators[args.image_annotator] + inst_seg_annotator = inst_seg_annotator_class( + device=args.device, size=args.annotator_size + ) annotator = annotator_class(device=args.device, size=args.annotator_size) - # Split image_paths into batches - image_batches = [ - image_paths[i : i + args.batch_size_annotation] - for i in range(0, len(image_paths), args.batch_size_annotation) - ] for i, image_batch in tqdm( enumerate(image_batches), desc="Annotating images", total=len(image_batches), ): - images = [Image.open(image_path) for image_path in image_batch] + images, batch_image_paths = read_image_batch( + image_batch, i, args.batch_size_annotation + ) + image_paths.extend(batch_image_paths) + boxes_batch, scores_batch, local_labels_batch = annotator.annotate_batch( images, args.class_names, @@ -512,14 +644,31 @@ def main(): boxes_list.extend(boxes_batch) scores_list.extend(scores_batch) + if args.task == "instance-segmentation": + masks_batch = inst_seg_annotator.annotate_batch( + images=images, + boxes_batch=boxes_batch, + iou_threshold=args.annotation_iou_threshold, + ) + segment_list.extend(masks_batch) + for j, image in enumerate(images): labels = [] # Save bbox visualizations fig, ax = plt.subplots(1) ax.imshow(image) - for box, score, label in zip( - boxes_batch[j], scores_batch[j], local_labels_batch[j] - ): + for k in range(len(boxes_batch[j])): + box = boxes_batch[j][k] + score = scores_batch[j][k] + label = local_labels_batch[j][k] + + if args.task == "instance-segmentation": + if k < len(masks_batch[j]): + mask = masks_batch[j][k] + x_points, y_points = zip(*mask) + + ax.fill(x_points, y_points, label, alpha=0.5) + labels.append(label) x1, y1, x2, y2 = box rect = patches.Rectangle( @@ -540,17 +689,21 @@ def main(): ) # Add prompt text as title if generated_prompts: - plt.title(generated_prompts[i * args.batch_size_annotation + j][1]) + title = generated_prompts[i * args.batch_size_annotation + j][1] + wrapped_title = "\n".join(textwrap.wrap(title, width=50)) + plt.title(wrapped_title) else: plt.title("Annotated image") labels_list.append(np.array(labels)) + plt.axis("off") plt.savefig( os.path.join( bbox_dir, f"bbox_{i * args.batch_size_annotation + j}.jpg" ) ) + plt.close() # Save annotations as JSON files @@ -558,6 +711,7 @@ def main(): image_paths=image_paths, labels_list=labels_list, boxes_list=boxes_list, + masks_list=segment_list if len(segment_list) > 0 else None, class_names=args.class_names, save_dir=save_dir, ) @@ -570,6 +724,8 @@ def main(): "yolo", args.split_ratios, copy_files=False, + is_instance_segmentation=args.task == "instance-segmentation", + keep_unlabeled_images=args.keep_unlabeled_images, seed=args.seed, ) # Convert annotations to COCO format @@ -579,6 +735,8 @@ def main(): args.save_dir, "coco", args.split_ratios, + is_instance_segmentation=args.task == "instance-segmentation", + keep_unlabeled_images=args.keep_unlabeled_images, copy_files=False, seed=args.seed, ) @@ -590,6 +748,10 @@ def main(): args.save_dir, "luxonis-dataset", args.split_ratios, + dataset_plugin=args.dataset_plugin, + dataset_name=args.dataset_name, + is_instance_segmentation=args.task == "instance-segmentation", + keep_unlabeled_images=args.keep_unlabeled_images, copy_files=False, seed=args.seed, ) diff --git a/datadreamer/prompt_generation/__init__.py b/datadreamer/prompt_generation/__init__.py index 1e7f853..20a10ef 100644 --- a/datadreamer/prompt_generation/__init__.py +++ b/datadreamer/prompt_generation/__init__.py @@ -2,6 +2,8 @@ from .lm_prompt_generator import LMPromptGenerator from .lm_synonym_generator import LMSynonymGenerator +from .profanity_filter import ProfanityFilter +from .qwen2_lm_prompt_generator import Qwen2LMPromptGenerator from .simple_prompt_generator import SimplePromptGenerator from .tinyllama_lm_prompt_generator import TinyLlamaLMPromptGenerator from .wordnet_synonym_generator import WordNetSynonymGenerator @@ -10,6 +12,8 @@ "SimplePromptGenerator", "LMPromptGenerator", "LMSynonymGenerator", + "ProfanityFilter", "TinyLlamaLMPromptGenerator", + "Qwen2LMPromptGenerator", "WordNetSynonymGenerator", ] diff --git a/datadreamer/prompt_generation/lm_prompt_generator.py b/datadreamer/prompt_generation/lm_prompt_generator.py index 10ca96e..ba1dfd5 100644 --- a/datadreamer/prompt_generation/lm_prompt_generator.py +++ b/datadreamer/prompt_generation/lm_prompt_generator.py @@ -1,8 +1,9 @@ from __future__ import annotations +import logging import random import re -from typing import List, Literal, Optional +from typing import List, Literal, Optional, Tuple import torch from tqdm import tqdm @@ -16,6 +17,8 @@ from datadreamer.prompt_generation.prompt_generator import PromptGenerator +logger = logging.getLogger(__name__) + class LMPromptGenerator(PromptGenerator): """A language model-based prompt generator class, extending PromptGenerator. @@ -62,15 +65,15 @@ def __init__( ) self.model, self.tokenizer, self.pipeline = self._init_lang_model() - def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: """Initializes the language model, tokenizer and pipeline for prompt generation. Returns: tuple: The initialized language model, tokenizer and pipeline. """ selected_dtype = "auto" + logger.info(f"Initializing Mistral-7B language model on {self.device}...") if self.device == "cpu": - print("Loading language model on CPU...") model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", torch_dtype="auto", @@ -79,7 +82,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin ) else: if self.quantization == "none": - print("Loading FP16 language model on GPU...") + logger.info("Loading FP16 language model...") selected_dtype = torch.float16 model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", @@ -88,7 +91,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin device_map=self.device, ) else: - print("Loading INT4 language model on GPU...") + logger.info("Loading INT4 language model...") # Create the BitsAndBytesConfig object with the dynamically constructed arguments bnb_config = BitsAndBytesConfig( load_in_4bit=True, @@ -115,7 +118,6 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin device_map=self.device, batch_size=self.batch_size, ) - print("Done!") return model, tokenizer, pipe def _remove_incomplete_sentence(self, text: str) -> str: @@ -186,9 +188,7 @@ def _test_prompt(self, prompt: str, selected_objects: List[str]) -> bool: Returns: bool: True if the prompt is valid, False otherwise. """ - return prompt.lower().startswith( - "a photo of" - ) # and all(obj.lower() in prompt.lower() for obj in selected_objects) + return prompt.lower().startswith("a photo of") def generate_prompts_batch(self, prompt_texts_batch: List[str]) -> List[str]: """Generates a list of prompts using the language model. @@ -219,7 +219,7 @@ def generate_prompts(self) -> List[str]: """ prompts = [] progress_bar = tqdm( - desc="Generating prompts...", position=0, total=self.prompts_number + desc="Generating prompts", position=0, total=self.prompts_number ) while len(prompts) < self.prompts_number: selected_objects_batch = [ diff --git a/datadreamer/prompt_generation/lm_synonym_generator.py b/datadreamer/prompt_generation/lm_synonym_generator.py index fc86db8..850ccfb 100644 --- a/datadreamer/prompt_generation/lm_synonym_generator.py +++ b/datadreamer/prompt_generation/lm_synonym_generator.py @@ -1,7 +1,8 @@ from __future__ import annotations +import logging import re -from typing import List, Optional +from typing import List, Optional, Tuple import torch from transformers import ( @@ -13,6 +14,8 @@ from datadreamer.prompt_generation.synonym_generator import SynonymGenerator +logger = logging.getLogger(__name__) + class LMSynonymGenerator(SynonymGenerator): """Synonym generator that generates synonyms for a list of words using a language @@ -42,14 +45,14 @@ def __init__( super().__init__(synonyms_number, seed, device) self.model, self.tokenizer, self.pipeline = self._init_lang_model() - def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: """Initializes the language model, tokenizer and pipeline for prompt generation. Returns: tuple: The initialized language model, tokenizer and pipeline. """ + logger.info(f"Initializing Mistral-7B language model on {self.device}...") if self.device == "cpu": - print("Loading language model on CPU...") model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", torch_dtype="auto", @@ -57,7 +60,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin low_cpu_mem_usage=True, ) else: - print("Loading FP16 language model on GPU...") + logger.info("Loading FP16 language model...") model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=torch.float16, @@ -73,7 +76,7 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin torch_dtype=torch.float16 if self.device == "cuda" else "auto", device_map=self.device, ) - print("Done!") + logger.info("Done!") return model, tokenizer, pipe def _generate_synonyms(self, prompt_text: str) -> List[str]: diff --git a/datadreamer/prompt_generation/profanity_filter.py b/datadreamer/prompt_generation/profanity_filter.py new file mode 100644 index 0000000..7a9da63 --- /dev/null +++ b/datadreamer/prompt_generation/profanity_filter.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +import logging +import random +from typing import List, Optional, Tuple + +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +from datadreamer.utils.bad_words import BAD_WORDS_LIST + +logger = logging.getLogger(__name__) + + +class ProfanityFilter: + """Class for filtering bad words from texts and checking if texts are safe. + + Attributes: + device (str): Device to run the language model on ('cuda' for GPU, 'cpu' for CPU). + use_lm (bool): Whether to use a language model for checking text safety. + seed (Optional[float]): Seed for randomization. + model (AutoModelForCausalLM): The pre-trained causal language model for checking text safety. + tokenizer (AutoTokenizer): The tokenizer for the pre-trained language model. + + Methods: + set_seed(seed): Sets the random seed for consistent prompt generation. + _init_lang_model(): Initializes the language model and tokenizer. + _contains_bad_words(texts): Checks if a list of texts contain bad words. + _check_lm_safety(text): Checks if a text is safe using a language model. + is_safe(classes): Checks if a list of classes is safe. + release(empty_cuda_cache): Releases the model and optionally empties the CUDA cache. + """ + + LLM_PROMPT = """You are Qwen, created by Alibaba Cloud. You are a helpful assistant who classifies the classes as appropriate or inappropriate. Inappropriate classes are those that directly relate to drugs, hate, racism, harassment, nudity, sexual or offensive words. Here are inappropriate examples: +- 'ass', +- 'a**', +- 'bitch', +- 'pussy', +- and 'f**k'. + +Otherwise, the classes are considered appropriate. They can talk about people, characters, animals, nature, history, human conflicts, and so on. Some acceptable examples are: +- 'cat', +- 'angry barking dog', +- 'alien', +- 'dracula', +- 'war', +- 'soldier', +- 'pluto', +- 'sun', +- and 'mercury.' + +Respond 'inappropriate' if the classes are unacceptable, otherwise respond with 'appropriate'.""" + + def __init__( + self, + device: str = "cuda", + use_lm: bool = False, + seed: Optional[float] = 42, + ) -> None: + """Initializes the ProfanityFilter with parameters.""" + self.seed = seed + if seed is not None: + self.set_seed(seed) + self.device = device + self.use_lm = use_lm + if self.use_lm: + self.model, self.tokenizer = self._init_lang_model() + + @staticmethod + def set_seed(seed: int) -> None: + """Sets the random seed for consistent prompt generation. + + Args: + seed (int): The random seed. + """ + random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]: + """Initializes the language model and tokenizer for prompt generation. + + Returns: + tuple: The initialized language model and tokenizer. + """ + logger.info( + f"Initializing Qwen2.5-1.5B-Instruct language model on {self.device}..." + ) + model_name = "Qwen/Qwen2.5-1.5B-Instruct" + if self.device == "cpu": + model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype="auto", + device_map="cpu", + low_cpu_mem_usage=True, + ) + else: + model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16, + device_map=self.device, + ) + tokenizer = AutoTokenizer.from_pretrained(model_name) + return model, tokenizer + + def _contains_bad_words(self, texts: List[str]) -> bool: + """Checks if a list of texts contain bad words. + + Args: + texts (List[str]): List of texts to checks against bad words list. + + Returns: + bool: True if any of the texts contain bad words, False otherwise. + """ + return any(text.lower() in BAD_WORDS_LIST for text in texts) + + def _check_lm_safety(self, text: str) -> bool: + """Checks if a text is safe using a language model. + + Args: + text (str): Text to check for bad words. + + Returns: + bool: True if the text is safe, False otherwise. + """ + if self.use_lm: + messages = [ + { + "role": "system", + "content": self.LLM_PROMPT, + }, + {"role": "user", "content": text}, + ] + processed_text = self.tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + model_inputs = self.tokenizer([processed_text], return_tensors="pt").to( + self.model.device + ) + with torch.no_grad(): + generated_ids = self.model.generate( + **model_inputs, + max_new_tokens=10, + do_sample=False, + top_k=None, + top_p=None, + temperature=None, + ) + generated_ids = [ + output_ids[len(input_ids) :] + for input_ids, output_ids in zip( + model_inputs.input_ids, generated_ids + ) + ] + response = self.tokenizer.batch_decode( + generated_ids, skip_special_tokens=True + )[0] + return "inappropriate" not in response.lower().strip() + return True + + def is_safe(self, classes: List[str]) -> bool: + """Checks if a list of classes is safe. + + Args: + classes (List[str]): List of classes to check for safety. + + Returns: + bool: True if the classes are safe, False otherwise. + """ + logger.info(f"Profanity filter is checking classes: {classes}") + return not self._contains_bad_words(classes) and self._check_lm_safety( + ",".join(classes) + ) + + def release(self, empty_cuda_cache=False) -> None: + """Releases the model and optionally empties the CUDA cache.""" + if self.use_lm: + if self.device == "cuda": + self.model = self.model.to("cpu") + if empty_cuda_cache: + with torch.no_grad(): + torch.cuda.empty_cache() + + +if __name__ == "__main__": + # Example usage of the class + profanity_filter = ProfanityFilter(use_lm=True, device="cpu") + classes_1 = ["cat", "fish", "dog", "ass", "person", "soldier", "war"] + print(f"Are classes#1 {classes_1} safe: {profanity_filter.is_safe(classes_1)}") + classes_2 = ["cat", "fish", "dog", "person", "soldier", "war"] + print(f"Are classes#2 {classes_2} safe: {profanity_filter.is_safe(classes_2)}") + profanity_filter.release() diff --git a/datadreamer/prompt_generation/prompt_generator.py b/datadreamer/prompt_generation/prompt_generator.py index 825243c..50662ac 100644 --- a/datadreamer/prompt_generation/prompt_generator.py +++ b/datadreamer/prompt_generation/prompt_generator.py @@ -49,7 +49,7 @@ def __init__( self.quantization = quantization if quantization is not None else "none" @staticmethod - def set_seed(seed: int): + def set_seed(seed: int) -> None: """Sets the random seed for consistent prompt generation. Args: diff --git a/datadreamer/prompt_generation/qwen2_lm_prompt_generator.py b/datadreamer/prompt_generation/qwen2_lm_prompt_generator.py new file mode 100644 index 0000000..48a6f5b --- /dev/null +++ b/datadreamer/prompt_generation/qwen2_lm_prompt_generator.py @@ -0,0 +1,187 @@ +from __future__ import annotations + +import logging +import re +from typing import List, Literal, Optional, Tuple + +import torch +from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + BitsAndBytesConfig, + Pipeline, + pipeline, +) + +from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator + +logger = logging.getLogger(__name__) + + +class Qwen2LMPromptGenerator(LMPromptGenerator): + """A language model-based prompt generator class, extending PromptGenerator. + + Attributes: + device (str): Device to run the language model on ('cuda' for GPU, 'cpu' for CPU). + model (AutoModelForCausalLM): The pre-trained causal language model for generating prompts. + tokenizer (AutoTokenizer): The tokenizer for the pre-trained language model. + pipeline (pipeline): The HuggingFace pipeline for generating text. + + Methods: + _init_lang_model(): Initializes the language model and tokenizer. + _remove_caption_sentences(text): Removes caption sentences from the generated prompt. + _create_lm_prompt_text(selected_objects): Creates a text prompt for the language model. + _postprocess_prompt(prompt): Post-processes the generated prompt. + generate_prompts_batch(prompt_texts_batch): Generates a batch of prompts using the language model. + """ + + def __init__( + self, + class_names: List[str], + prompts_number: int = 10, + num_objects_range: Optional[List[int]] = None, + batch_size: int = 1, + seed: Optional[float] = 42, + device: str = "cuda", + quantization: Optional[Literal["none", "4bit"]] = "none", + ) -> None: + """Initializes the LMPromptGenerator with class names and other settings.""" + super().__init__( + class_names, + prompts_number, + num_objects_range, + batch_size, + seed, + device, + quantization, + ) + + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: + """Initializes the language model, tokenizer and pipeline for prompt generation. + + Returns: + tuple: The initialized language model, tokenizer and pipeline. + """ + selected_dtype = "auto" + logger.info( + f"Initializing Qwen2.5-1.5B-Instruct language model on {self.device}..." + ) + if self.device == "cpu": + model = AutoModelForCausalLM.from_pretrained( + "Qwen/Qwen2.5-1.5B-Instruct", + torch_dtype="auto", + device_map="cpu", + low_cpu_mem_usage=True, + ) + else: + if self.quantization == "none": + logger.info("Loading FP16 language model...") + selected_dtype = torch.float16 + model = AutoModelForCausalLM.from_pretrained( + "Qwen/Qwen2.5-1.5B-Instruct", + torch_dtype=selected_dtype, + trust_remote_code=True, + device_map=self.device, + ) + else: + logger.info("Loading INT4 language model...") + # Create the BitsAndBytesConfig object with the dynamically constructed arguments + bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + ) + selected_dtype = torch.bfloat16 + + model = AutoModelForCausalLM.from_pretrained( + "Qwen/Qwen2.5-1.5B-Instruct", + quantization_config=bnb_config, + torch_dtype=selected_dtype, + device_map=self.device, + trust_remote_code=True, + ) + + tokenizer = AutoTokenizer.from_pretrained( + "Qwen/Qwen2.5-1.5B-Instruct", padding_side="left" + ) + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + torch_dtype=selected_dtype, + device_map=self.device, + batch_size=self.batch_size, + ) + return model, tokenizer, pipe + + def _remove_caption_sentences(self, text: str) -> str: + """Removes caption sentences from the generated prompt. + + Args: + text (str): The generated prompt text. + + Returns: + str: The cleaned prompt text. + """ + pattern = re.compile(r"\s*Caption reads: [^\.!?]*[\.\!?]", re.IGNORECASE) + # Replace the matched sentences with an empty string + cleaned_text = re.sub(pattern, "", text) + return cleaned_text + + def _create_lm_prompt_text(self, selected_objects: List[str]) -> str: + """Creates a language model text prompt based on selected objects. + + Args: + selected_objects (List[str]): Objects to include in the prompt. + + Returns: + str: A text prompt for the language model. + """ + return f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a chatbot who describes content of images!<|im_end|>\n<|im_start|>user\nGenerate a short and concise caption for an image. The caption must begin with this template: 'A photo of {', '.join(selected_objects)}'. The objects within the scene interact in a meaningful way. Complete the caption with a short scene description.<|im_end|>\n<|im_start|>assistant\n" + + def _postprocess_prompt(self, prompt: str) -> str: + """Post-processes the generated prompt. + + Args: + prompt (str): The generated prompt. + + Returns: + str: The post-processed prompt. + """ + instructional_pattern = r"<\|im_start\|>system\n.*?<\|im_end\|>\n<\|im_start\|>user\n.*?<\|im_end\|>\n<\|im_start\|>assistant\n" + # Remove the instructional text to isolate the caption + prompt = ( + re.sub(instructional_pattern, "", prompt).replace('"', "").replace("'", "") + ) + prompt = self._remove_caption_sentences( + self._remove_incomplete_sentence(prompt) + ) + return prompt + + def generate_prompts_batch(self, prompt_texts_batch: List[str]) -> List[str]: + """Generates a list of prompts using the language model. + + Args: + prompt_texts_batch (List[str]): List of text prompts for the language model. + + Returns: + List[str]: List of generated prompts. + """ + sequences = self.pipeline(prompt_texts_batch, max_new_tokens=70) + decoded_prompts = [ + self._postprocess_prompt(sequence[0]["generated_text"]) + for sequence in sequences + ] + + return decoded_prompts + + +if __name__ == "__main__": + # Example usage of the class + object_names = ["aeroplane", "bicycle", "bird", "boat", "city"] + prompt_generator = Qwen2LMPromptGenerator( + class_names=object_names, prompts_number=5, device="cpu" + ) + generated_prompts = prompt_generator.generate_prompts() + for prompt in generated_prompts: + print(prompt) diff --git a/datadreamer/prompt_generation/synonym_generator.py b/datadreamer/prompt_generation/synonym_generator.py index ec3f306..b5d338f 100644 --- a/datadreamer/prompt_generation/synonym_generator.py +++ b/datadreamer/prompt_generation/synonym_generator.py @@ -1,11 +1,14 @@ from __future__ import annotations import json +import logging from abc import ABC, abstractmethod -from typing import List, Optional +from typing import Dict, List, Optional from tqdm import tqdm +logger = logging.getLogger(__name__) + # Abstract base class for synonym generation class SynonymGenerator(ABC): @@ -38,7 +41,7 @@ def __init__( self.seed = seed self.device = device - def generate_synonyms_for_list(self, words: List[str]) -> dict: + def generate_synonyms_for_list(self, words: List[str]) -> Dict: """Generates synonyms for a list of words and returns them in a dictionary. Args: @@ -51,10 +54,9 @@ def generate_synonyms_for_list(self, words: List[str]) -> dict: for word in tqdm(words, desc="Generating synonyms"): synonyms = self.generate_synonyms(word) synonyms_dict[word] = synonyms - print("Synonyms generated") return synonyms_dict - def save_synonyms(self, synonyms, save_path: str) -> None: + def save_synonyms(self, synonyms: Dict, save_path: str) -> None: """Saves the generated synonyms to a JSON file. Args: diff --git a/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py b/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py index 78238e7..9e939a7 100644 --- a/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py +++ b/datadreamer/prompt_generation/tinyllama_lm_prompt_generator.py @@ -1,13 +1,16 @@ from __future__ import annotations +import logging import re -from typing import List, Literal, Optional +from typing import List, Literal, Optional, Tuple import torch from transformers import AutoModelForCausalLM, AutoTokenizer, Pipeline, pipeline from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator +logger = logging.getLogger(__name__) + class TinyLlamaLMPromptGenerator(LMPromptGenerator): """A language model-based prompt generator class, extending PromptGenerator. @@ -47,14 +50,14 @@ def __init__( quantization, ) - def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: + def _init_lang_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer, Pipeline]: """Initializes the language model, tokenizer and pipeline for prompt generation. Returns: tuple: The initialized language model, tokenizer and pipeline. """ + logger.info(f"Initializing TinyLlama-1.1B language model on {self.device}...") if self.device == "cpu": - print("Loading language model on CPU...") model = AutoModelForCausalLM.from_pretrained( "TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype="auto", @@ -62,7 +65,6 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin low_cpu_mem_usage=True, ) else: - print("Loading language model on GPU...") model = AutoModelForCausalLM.from_pretrained( "TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.float16, @@ -82,7 +84,6 @@ def _init_lang_model(self) -> tuple[AutoModelForCausalLM, AutoTokenizer, Pipelin device_map=self.device, batch_size=self.batch_size, ) - print("Done!") return model, tokenizer, pipe def _remove_caption_sentences(self, text: str) -> str: diff --git a/datadreamer/utils/bad_words.py b/datadreamer/utils/bad_words.py new file mode 100644 index 0000000..532b19f --- /dev/null +++ b/datadreamer/utils/bad_words.py @@ -0,0 +1,961 @@ +# Source of this list: https://github.com/coffee-and-fun/google-profanity-words/blob/main/data/en.txt +BAD_WORDS_LIST = [ + "2 girls 1 cup", + "2g1c", + "4r5e", + "5h1t", + "5hit", + "a55", + "a_s_s", + "acrotomophilia", + "alabama hot pocket", + "alaskan pipeline", + "anal", + "anilingus", + "anus", + "apeshit", + "ar5e", + "arrse", + "arse", + "arsehole", + "ass", + "ass-fucker", + "ass-hat", + "ass-pirate", + "assbag", + "assbandit", + "assbanger", + "assbite", + "assclown", + "asscock", + "asscracker", + "asses", + "assface", + "assfucker", + "assfukka", + "assgoblin", + "asshat", + "asshead", + "asshole", + "assholes", + "asshopper", + "assjacker", + "asslick", + "asslicker", + "assmonkey", + "assmunch", + "assmuncher", + "asspirate", + "assshole", + "asssucker", + "asswad", + "asswhole", + "asswipe", + "auto erotic", + "autoerotic", + "b!tch", + "b00bs", + "b17ch", + "b1tch", + "babeland", + "baby batter", + "baby juice", + "ball gag", + "ball gravy", + "ball kicking", + "ball licking", + "ball sack", + "ball sucking", + "ballbag", + "balls", + "ballsack", + "bampot", + "bangbros", + "bareback", + "barely legal", + "barenaked", + "bastard", + "bastardo", + "bastinado", + "bbw", + "bdsm", + "beaner", + "beaners", + "beastial", + "beastiality", + "beastility", + "beaver cleaver", + "beaver lips", + "bellend", + "bestial", + "bestiality", + "bi+ch", + "biatch", + "big black", + "big breasts", + "big knockers", + "big tits", + "bimbos", + "birdlock", + "bitch", + "bitcher", + "bitchers", + "bitches", + "bitchin", + "bitching", + "black cock", + "blonde action", + "blonde on blonde action", + "bloody", + "blow job", + "blow your load", + "blowjob", + "blowjobs", + "blue waffle", + "blumpkin", + "boiolas", + "bollock", + "bollocks", + "bollok", + "bollox", + "bondage", + "boner", + "boob", + "boobie", + "boobs", + "booobs", + "boooobs", + "booooobs", + "booooooobs", + "booty call", + "breasts", + "brown showers", + "brunette action", + "buceta", + "bugger", + "bukkake", + "bulldyke", + "bullet vibe", + "bullshit", + "bum", + "bung hole", + "bunghole", + "bunny fucker", + "busty", + "butt", + "butt-pirate", + "buttcheeks", + "butthole", + "buttmunch", + "buttplug", + "c0ck", + "c0cksucker", + "camel toe", + "camgirl", + "camslut", + "camwhore", + "carpet muncher", + "carpetmuncher", + "cawk", + "chinc", + "chink", + "choad", + "chocolate rosebuds", + "chode", + "cipa", + "circlejerk", + "cl1t", + "cleveland steamer", + "clit", + "clitface", + "clitoris", + "clits", + "clover clamps", + "clusterfuck", + "cnut", + "cock", + "cock-sucker", + "cockbite", + "cockburger", + "cockface", + "cockhead", + "cockjockey", + "cockknoker", + "cockmaster", + "cockmongler", + "cockmongruel", + "cockmonkey", + "cockmunch", + "cockmuncher", + "cocknose", + "cocknugget", + "cocks", + "cockshit", + "cocksmith", + "cocksmoker", + "cocksuck", + "cocksuck", + "cocksucked", + "cocksucked", + "cocksucker", + "cocksucking", + "cocksucks", + "cocksuka", + "cocksukka", + "cok", + "cokmuncher", + "coksucka", + "coochie", + "coochy", + "coon", + "coons", + "cooter", + "coprolagnia", + "coprophilia", + "cornhole", + "cox", + "crap", + "creampie", + "cum", + "cumbubble", + "cumdumpster", + "cumguzzler", + "cumjockey", + "cummer", + "cumming", + "cums", + "cumshot", + "cumslut", + "cumtart", + "cunilingus", + "cunillingus", + "cunnie", + "cunnilingus", + "cunt", + "cuntface", + "cunthole", + "cuntlick", + "cuntlick", + "cuntlicker", + "cuntlicker", + "cuntlicking", + "cuntlicking", + "cuntrag", + "cunts", + "cyalis", + "cyberfuc", + "cyberfuck", + "cyberfucked", + "cyberfucker", + "cyberfuckers", + "cyberfucking", + "d1ck", + "dammit", + "damn", + "darkie", + "date rape", + "daterape", + "deep throat", + "deepthroat", + "dendrophilia", + "dick", + "dickbag", + "dickbeater", + "dickface", + "dickhead", + "dickhole", + "dickjuice", + "dickmilk", + "dickmonger", + "dickslap", + "dicksucker", + "dickwad", + "dickweasel", + "dickweed", + "dickwod", + "dike", + "dildo", + "dildos", + "dingleberries", + "dingleberry", + "dink", + "dinks", + "dipshit", + "dirsa", + "dirty pillows", + "dirty sanchez", + "dlck", + "dog style", + "dog-fucker", + "doggie style", + "doggiestyle", + "doggin", + "dogging", + "doggy style", + "doggystyle", + "dolcett", + "domination", + "dominatrix", + "dommes", + "donkey punch", + "donkeyribber", + "doochbag", + "dookie", + "doosh", + "double dong", + "double penetration", + "douche", + "douchebag", + "dp action", + "dry hump", + "duche", + "dumbshit", + "dumshit", + "dvda", + "dyke", + "eat my ass", + "ecchi", + "ejaculate", + "ejaculated", + "ejaculates", + "ejaculating", + "ejaculatings", + "ejaculation", + "ejakulate", + "erotic", + "erotism", + "escort", + "eunuch", + "f u c k", + "f u c k e r", + "f4nny", + "f_u_c_k", + "fag", + "fagbag", + "fagg", + "fagging", + "faggit", + "faggitt", + "faggot", + "faggs", + "fagot", + "fagots", + "fags", + "fagtard", + "fanny", + "fannyflaps", + "fannyfucker", + "fanyy", + "fart", + "farted", + "farting", + "farty", + "fatass", + "fcuk", + "fcuker", + "fcuking", + "fecal", + "feck", + "fecker", + "felatio", + "felch", + "felching", + "fellate", + "fellatio", + "feltch", + "female squirting", + "femdom", + "figging", + "fingerbang", + "fingerfuck", + "fingerfucked", + "fingerfucker", + "fingerfuckers", + "fingerfucking", + "fingerfucks", + "fingering", + "fistfuck", + "fistfucked", + "fistfucker", + "fistfuckers", + "fistfucking", + "fistfuckings", + "fistfucks", + "fisting", + "flamer", + "flange", + "fook", + "fooker", + "foot fetish", + "footjob", + "frotting", + "fuck", + "fuck buttons", + "fucka", + "fucked", + "fucker", + "fuckers", + "fuckhead", + "fuckheads", + "fuckin", + "fucking", + "fuckings", + "fuckingshitmotherfucker", + "fuckme", + "fucks", + "fucktards", + "fuckwhit", + "fuckwit", + "fudge packer", + "fudgepacker", + "fuk", + "fuker", + "fukker", + "fukkin", + "fuks", + "fukwhit", + "fukwit", + "futanari", + "fux", + "fux0r", + "g-spot", + "gang bang", + "gangbang", + "gangbanged", + "gangbanged", + "gangbangs", + "gay sex", + "gayass", + "gaybob", + "gaydo", + "gaylord", + "gaysex", + "gaytard", + "gaywad", + "genitals", + "giant cock", + "girl on", + "girl on top", + "girls gone wild", + "goatcx", + "goatse", + "god damn", + "god-dam", + "god-damned", + "goddamn", + "goddamned", + "gokkun", + "golden shower", + "goo girl", + "gooch", + "goodpoop", + "gook", + "goregasm", + "gringo", + "grope", + "group sex", + "guido", + "guro", + "hand job", + "handjob", + "hard core", + "hardcore", + "hardcoresex", + "heeb", + "hell", + "hentai", + "heshe", + "ho", + "hoar", + "hoare", + "hoe", + "hoer", + "homo", + "homoerotic", + "honkey", + "honky", + "hooker", + "hore", + "horniest", + "horny", + "hot carl", + "hot chick", + "hotsex", + "how to kill", + "how to murder", + "huge fat", + "humping", + "incest", + "intercourse", + "jack off", + "jack-off", + "jackass", + "jackoff", + "jail bait", + "jailbait", + "jap", + "jelly donut", + "jerk off", + "jerk-off", + "jigaboo", + "jiggaboo", + "jiggerboo", + "jism", + "jiz", + "jiz", + "jizm", + "jizm", + "jizz", + "juggs", + "kawk", + "kike", + "kinbaku", + "kinkster", + "kinky", + "kiunt", + "knob", + "knobbing", + "knobead", + "knobed", + "knobend", + "knobhead", + "knobjocky", + "knobjokey", + "kock", + "kondum", + "kondums", + "kooch", + "kootch", + "kum", + "kumer", + "kummer", + "kumming", + "kums", + "kunilingus", + "kunt", + "kyke", + "l3i+ch", + "l3itch", + "labia", + "leather restraint", + "leather straight jacket", + "lemon party", + "lesbo", + "lezzie", + "lmfao", + "lolita", + "lovemaking", + "lust", + "lusting", + "m0f0", + "m0fo", + "m45terbate", + "ma5terb8", + "ma5terbate", + "make me come", + "male squirting", + "masochist", + "master-bate", + "masterb8", + "masterbat*", + "masterbat3", + "masterbate", + "masterbation", + "masterbations", + "masturbate", + "menage a trois", + "milf", + "minge", + "missionary position", + "mo-fo", + "mof0", + "mofo", + "mothafuck", + "mothafucka", + "mothafuckas", + "mothafuckaz", + "mothafucked", + "mothafucker", + "mothafuckers", + "mothafuckin", + "mothafucking", + "mothafuckings", + "mothafucks", + "mother fucker", + "motherfuck", + "motherfucked", + "motherfucker", + "motherfuckers", + "motherfuckin", + "motherfucking", + "motherfuckings", + "motherfuckka", + "motherfucks", + "mound of venus", + "mr hands", + "muff", + "muff diver", + "muffdiver", + "muffdiving", + "mutha", + "muthafecker", + "muthafuckker", + "muther", + "mutherfucker", + "n1gga", + "n1gger", + "nambla", + "nawashi", + "nazi", + "negro", + "neonazi", + "nig nog", + "nigg3r", + "nigg4h", + "nigga", + "niggah", + "niggas", + "niggaz", + "nigger", + "niggers", + "niglet", + "nimphomania", + "nipple", + "nipples", + "nob", + "nob jokey", + "nobhead", + "nobjocky", + "nobjokey", + "nsfw images", + "nude", + "nudity", + "numbnuts", + "nutsack", + "nympho", + "nymphomania", + "octopussy", + "omorashi", + "one cup two girls", + "one guy one jar", + "orgasim", + "orgasim", + "orgasims", + "orgasm", + "orgasms", + "orgy", + "p0rn", + "paedophile", + "paki", + "panooch", + "panties", + "panty", + "pawn", + "pecker", + "peckerhead", + "pedobear", + "pedophile", + "pegging", + "penis", + "penisfucker", + "phone sex", + "phonesex", + "phuck", + "phuk", + "phuked", + "phuking", + "phukked", + "phukking", + "phuks", + "phuq", + "piece of shit", + "pigfucker", + "pimpis", + "pis", + "pises", + "pisin", + "pising", + "pisof", + "piss", + "piss pig", + "pissed", + "pisser", + "pissers", + "pisses", + "pissflap", + "pissflaps", + "pissin", + "pissin", + "pissing", + "pissoff", + "pissoff", + "pisspig", + "playboy", + "pleasure chest", + "pole smoker", + "polesmoker", + "pollock", + "ponyplay", + "poo", + "poof", + "poon", + "poonani", + "poonany", + "poontang", + "poop", + "poop chute", + "poopchute", + "porn", + "porno", + "pornography", + "pornos", + "prick", + "pricks", + "prince albert piercing", + "pron", + "pthc", + "pube", + "pubes", + "punanny", + "punany", + "punta", + "pusies", + "pusse", + "pussi", + "pussies", + "pussy", + "pussylicking", + "pussys", + "pusy", + "puto", + "queaf", + "queef", + "queerbait", + "queerhole", + "quim", + "raghead", + "raging boner", + "rape", + "raping", + "rapist", + "rectum", + "renob", + "retard", + "reverse cowgirl", + "rimjaw", + "rimjob", + "rimming", + "rosy palm", + "rosy palm and her 5 sisters", + "ruski", + "rusty trombone", + "s hit", + "s&m", + "s.o.b.", + "s_h_i_t", + "sadism", + "sadist", + "santorum", + "scat", + "schlong", + "scissoring", + "screwing", + "scroat", + "scrote", + "scrotum", + "semen", + "sex", + "sexo", + "sexy", + "sh!+", + "sh!t", + "sh1t", + "shag", + "shagger", + "shaggin", + "shagging", + "shaved beaver", + "shaved pussy", + "shemale", + "shi+", + "shibari", + "shit", + "shit-ass", + "shit-bag", + "shit-bagger", + "shit-brain", + "shit-breath", + "shit-cunt", + "shit-dick", + "shit-eating", + "shit-face", + "shit-faced", + "shit-fit", + "shit-head", + "shit-heel", + "shit-hole", + "shit-house", + "shit-load", + "shit-pot", + "shit-spitter", + "shit-stain", + "shitass", + "shitbag", + "shitbagger", + "shitblimp", + "shitbrain", + "shitbreath", + "shitcunt", + "shitdick", + "shite", + "shiteating", + "shited", + "shitey", + "shitface", + "shitfaced", + "shitfit", + "shitfuck", + "shitfull", + "shithead", + "shitheel", + "shithole", + "shithouse", + "shiting", + "shitings", + "shitload", + "shitpot", + "shits", + "shitspitter", + "shitstain", + "shitted", + "shitter", + "shitters", + "shittiest", + "shitting", + "shittings", + "shitty", + "shitty", + "shity", + "shiz", + "shiznit", + "shota", + "shrimping", + "skank", + "skeet", + "slanteye", + "slut", + "slutbag", + "sluts", + "smeg", + "smegma", + "smut", + "snatch", + "snowballing", + "sodomize", + "sodomy", + "son-of-a-bitch", + "spac", + "spic", + "spick", + "splooge", + "splooge moose", + "spooge", + "spread legs", + "spunk", + "strap on", + "strapon", + "strappado", + "strip club", + "style doggy", + "suck", + "sucks", + "suicide girls", + "sultry women", + "swastika", + "swinger", + "t1tt1e5", + "t1tties", + "tainted love", + "tard", + "taste my", + "tea bagging", + "teets", + "teez", + "testical", + "testicle", + "threesome", + "throating", + "thundercunt", + "tied up", + "tight white", + "tit", + "titfuck", + "tits", + "titt", + "tittie5", + "tittiefucker", + "titties", + "titty", + "tittyfuck", + "tittywank", + "titwank", + "tongue in a", + "topless", + "tosser", + "towelhead", + "tranny", + "tribadism", + "tub girl", + "tubgirl", + "turd", + "tushy", + "tw4t", + "twat", + "twathead", + "twatlips", + "twatty", + "twink", + "twinkie", + "two girls one cup", + "twunt", + "twunter", + "undressing", + "upskirt", + "urethra play", + "urophilia", + "v14gra", + "v1gra", + "va-j-j", + "vag", + "vagina", + "venus mound", + "viagra", + "vibrator", + "violet wand", + "vjayjay", + "vorarephilia", + "voyeur", + "vulva", + "w00se", + "wang", + "wank", + "wanker", + "wanky", + "wet dream", + "wetback", + "white power", + "whoar", + "whore", + "willies", + "willy", + "wrapping men", + "wrinkled starfish", + "xrated", + "xx", + "xxx", + "yaoi", + "yellow showers", + "yiffy", + "zoophilia", + "🖕", +] diff --git a/datadreamer/utils/base_converter.py b/datadreamer/utils/base_converter.py index 3d97199..40003ed 100644 --- a/datadreamer/utils/base_converter.py +++ b/datadreamer/utils/base_converter.py @@ -2,6 +2,7 @@ import json from abc import ABC, abstractmethod +from typing import Dict, List, Tuple import numpy as np @@ -13,47 +14,56 @@ def __init__(self, seed=42): np.random.seed(seed) @abstractmethod - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + copy_files: bool = True, + ) -> None: """Converts a dataset into another format. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. - + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ pass @staticmethod - def read_annotations(annotation_path): + def read_annotations(annotation_path: str) -> Dict: """Reads annotations from a JSON file located at the specified path. Args: - - annotation_path (str): The path to the JSON file containing annotations. + annotation_path (str): The path to the JSON file containing annotations. Returns: - - dict: A dictionary containing the data loaded from the JSON file. + dict: A dictionary containing the data loaded from the JSON file. """ with open(annotation_path) as f: data = json.load(f) return data @staticmethod - def make_splits(images, split_ratios, shuffle=True): + def make_splits( + images: List[str], split_ratios: List[float], shuffle: bool = True + ) -> Tuple[List, List, List]: """Splits the list of images into training, validation, and test sets. Args: - - images (list of str): A list of image paths. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - shuffle (bool, optional): Whether to shuffle the list of images. Defaults to True. + images (list of str): A list of image paths. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + shuffle (bool, optional): Whether to shuffle the list of images. Defaults to True. Returns: - - list of str: A list of image paths for the training set. - - list of str: A list of image paths for the validation set. - - list of str: A list of image paths for the test set. + list of str: A list of image paths for the training set. + list of str: A list of image paths for the validation set. + list of str: A list of image paths for the test set. """ if shuffle: np.random.shuffle(images) diff --git a/datadreamer/utils/coco_converter.py b/datadreamer/utils/coco_converter.py index ba02d97..40d599a 100644 --- a/datadreamer/utils/coco_converter.py +++ b/datadreamer/utils/coco_converter.py @@ -1,13 +1,18 @@ from __future__ import annotations import json +import logging import os import shutil +from typing import Dict, List +import numpy as np from PIL import Image from datadreamer.utils.base_converter import BaseConverter +logger = logging.getLogger(__name__) + class COCOConverter(BaseConverter): """Class for converting a dataset to COCO format. @@ -28,41 +33,77 @@ class COCOConverter(BaseConverter): │ ├── labels.json """ - def __init__(self, seed=42): + def __init__(self, seed=42, is_instance_segmentation: bool = False): super().__init__(seed) - - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + self.is_instance_segmentation = is_instance_segmentation + + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + copy_files: bool = True, + ) -> None: """Converts a dataset into a COCO format. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ annotation_path = os.path.join(dataset_dir, "annotations.json") data = BaseConverter.read_annotations(annotation_path) - self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) + self.process_data( + data, + dataset_dir, + output_dir, + split_ratios, + keep_unlabeled_images, + copy_files, + ) - def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True): + def process_data( + self, + data: Dict, + image_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + copy_files: bool = True, + ) -> None: """Processes the data by dividing it into training and validation sets, and saves the images and labels in COCO format. Args: - - data (dict): The dictionary containing image annotations. - - image_dir (str): The directory where the source images are located. - - output_dir (str): The base directory where the processed data will be saved. - - split_ratios (float): The ratio to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. - + data (dict): The dictionary containing image annotations. + image_dir (str): The directory where the source images are located. + output_dir (str): The base directory where the processed data will be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ images = list(data.keys()) images.remove("class_names") + empty_images = list(filter(lambda x: len(data[x]["labels"]) == 0, images)) + if keep_unlabeled_images and len(empty_images) > 0: + logger.warning( + f"{len(empty_images)} images with no annotations will be included in the dataset." + ) + elif not keep_unlabeled_images and len(empty_images) > 0: + logger.info( + f"{len(empty_images)} images with no annotations will be excluded from the dataset." + ) + for image in empty_images: + images.remove(image) + train_images, val_images, test_images = BaseConverter.make_splits( images, split_ratios ) @@ -98,19 +139,36 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru "height": image_height, } ) + masks = ( + annotation.get("masks") + if self.is_instance_segmentation + else [None] * len(annotation["boxes"]) + ) + + # Loop through boxes, labels, and masks, appending to annotations + for box, label, mask in zip( + annotation["boxes"], annotation["labels"], masks + ): + bbox = [box[0], box[1], box[2] - box[0], box[3] - box[1]] + segmentation = ( + np.array(mask).reshape(1, -1).tolist() + if mask is not None + else None + ) + area = (box[2] - box[0]) * (box[3] - box[1]) - for box, label in zip(annotation["boxes"], annotation["labels"]): annotations.append( { "id": annotation_id, "image_id": len(images_info), "category_id": label, - "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]], - "segmentation": None, # [[box[0], box[1], box[2], box[1], box[2], box[3], box[0], box[3]]], # bbox mask - "area": (box[2] - box[0]) * (box[3] - box[1]), + "bbox": bbox, + "segmentation": segmentation, + "area": area, "iscrowd": 0, } ) + annotation_id += 1 if copy_files: @@ -126,14 +184,20 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru dataset_output_dir, images_info, annotations, data["class_names"] ) - def save_labels(self, dataset_output_dir, images_info, annotations, class_names): + def save_labels( + self, + dataset_output_dir: str, + images_info: List[Dict], + annotations: List[Dict], + class_names: List[str], + ) -> None: """Saves the labels to a JSON file. Args: - - dataset_output_dir (str): The directory where the labels should be saved. - - images_info (list of dict): A list of dictionaries containing image information. - - annotations (list of dict): A list of dictionaries containing annotation information. - - class_names (list of str): A list of class names. + dataset_output_dir (str): The directory where the labels should be saved. + images_info (list of dict): A list of dictionaries containing image information. + annotations (list of dict): A list of dictionaries containing annotation information. + class_names (list of str): A list of class names. No return value. """ diff --git a/datadreamer/utils/config.py b/datadreamer/utils/config.py index 2e87832..6227b61 100644 --- a/datadreamer/utils/config.py +++ b/datadreamer/utils/config.py @@ -1,9 +1,8 @@ from __future__ import annotations -from typing import Annotated, List, Literal - from luxonis_ml.utils import LuxonisConfig from pydantic import Field +from typing_extensions import Annotated, List, Literal class Config(LuxonisConfig): @@ -11,7 +10,7 @@ class Config(LuxonisConfig): save_dir: str = "generated_dataset" class_names: List[str] = ["bear", "bicycle", "bird", "person"] prompts_number: int = 10 - task: Literal["detection", "classification"] = "detection" + task: Literal["detection", "classification", "instance-segmentation"] = "detection" seed: int = 42 device: Literal["cuda", "cpu"] = "cuda" annotate_only: bool = False @@ -22,7 +21,7 @@ class Config(LuxonisConfig): List[float], Field(default=[0.8, 0.1, 0.1], min_length=3, max_length=3) ] = [0.8, 0.1, 0.1] # Prompt generation arguments - prompt_generator: Literal["simple", "lm", "tiny"] = "simple" + prompt_generator: Literal["simple", "lm", "tiny", "qwen2"] = "qwen2" synonym_generator: Literal["none", "llm", "wordnet"] = "none" num_objects_range: Annotated[ List[int], Field(default=[1, 3], min_length=2, max_length=2) @@ -37,10 +36,18 @@ class Config(LuxonisConfig): batch_size_image: int = 1 use_image_tester: bool = False image_tester_patience: int = 1 + # Profanity filter arguments + disable_lm_filter: bool = False # Annotation arguments - image_annotator: Literal["owlv2", "clip"] = "owlv2" + image_annotator: Literal["owlv2", "clip", "owlv2-slimsam"] = "owlv2" conf_threshold: float = 0.15 annotation_iou_threshold: float = 0.2 use_tta: bool = False annotator_size: Literal["base", "large"] = "base" batch_size_annotation: int = 1 + dataset_plugin: str = "" + loader_plugin: str = "" + dataset_name: str = "" + dataset_id: str = "" + # Dataset arguments + keep_unlabeled_images: bool = False diff --git a/datadreamer/utils/convert_dataset.py b/datadreamer/utils/convert_dataset.py index 7b028e4..1bcea34 100644 --- a/datadreamer/utils/convert_dataset.py +++ b/datadreamer/utils/convert_dataset.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +from typing import List, Optional from datadreamer.utils import ( COCOConverter, @@ -11,20 +12,57 @@ def convert_dataset( - input_dir, output_dir, dataset_format, split_ratios, copy_files=True, seed=42 -): + input_dir: str, + output_dir: str, + dataset_format: str, + split_ratios: List[float], + dataset_plugin: Optional[str] = None, + dataset_name: Optional[str] = None, + is_instance_segmentation: bool = False, + keep_unlabeled_images: bool = False, + copy_files: bool = True, + seed: int = 42, +) -> None: + """Converts a dataset from one format to another. + + Args: + input_dir (str): Directory containing the images and annotations. + output_dir (str): Directory where the processed dataset will be saved. + dataset_format (str): Format of the dataset. Can be 'yolo', 'coco', 'luxonis-dataset', or 'cls-single'. + split_ratios (lis of float): List of ratios for train, val, and test splits. + dataset_plugin (str, optional): Plugin for Luxonis dataset. Defaults to None. + dataset_name (str, optional): Name of the Luxonis dataset. Defaults to None. + is_instance_segmentation (bool, optional): Whether the dataset is for instance segmentation. Defaults to False. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the files to the output directory. Defaults to True. + seed (int, optional): Random seed. Defaults to 42. + + No return value. + """ + if dataset_format == "yolo": - converter = YOLOConverter(seed=seed) + converter = YOLOConverter( + seed=seed, is_instance_segmentation=is_instance_segmentation + ) elif dataset_format == "coco": - converter = COCOConverter(seed=seed) + converter = COCOConverter( + seed=seed, is_instance_segmentation=is_instance_segmentation + ) elif dataset_format == "luxonis-dataset": - converter = LuxonisDatasetConverter(seed=seed) + converter = LuxonisDatasetConverter( + dataset_plugin=dataset_plugin, + dataset_name=dataset_name, + seed=seed, + is_instance_segmentation=is_instance_segmentation, + ) elif dataset_format == "cls-single": converter = SingleLabelClsConverter(seed=seed) else: raise ValueError(f"Invalid dataset format: {dataset_format}") - converter.convert(input_dir, output_dir, split_ratios, copy_files) + converter.convert( + input_dir, output_dir, split_ratios, keep_unlabeled_images, copy_files + ) def main(): @@ -52,6 +90,29 @@ def main(): default=[0.8, 0.1, 0.1], help="Train-validation-test split ratios (default: 0.8, 0.1, 0.1).", ) + parser.add_argument( + "--dataset_plugin", + type=str, + default=None, + help="Dataset plugin to use for luxonis-dataset format.", + ) + parser.add_argument( + "--dataset_name", + type=str, + help="Name of the dataset to create if dataset_plugin is used", + ) + parser.add_argument( + "--is_instance_segmentation", + default=None, + action="store_true", + help="Whether the dataset is for instance segmentation.", + ) + parser.add_argument( + "--keep_unlabeled_images", + default=None, + action="store_true", + help="Whether to keep images without any annotations", + ) parser.add_argument( "--copy_files", type=bool, @@ -68,11 +129,16 @@ def main(): args = parser.parse_args() convert_dataset( - args.input_dir, - args.output_dir, - args.dataset_format, - args.split_ratios, - args.copy_files, + input_dir=args.input_dir, + output_dir=args.output_dir, + dataset_format=args.dataset_format, + split_ratios=args.split_ratios, + dataset_plugin=args.dataset_plugin, + dataset_name=args.dataset_name, + is_instance_segmentation=args.is_instance_segmentation, + keep_unlabeled_images=args.keep_unlabeled_images, + copy_files=args.copy_files, + seed=args.seed, ) diff --git a/datadreamer/utils/dataset_utils.py b/datadreamer/utils/dataset_utils.py index a396ae0..a1c5971 100644 --- a/datadreamer/utils/dataset_utils.py +++ b/datadreamer/utils/dataset_utils.py @@ -6,10 +6,27 @@ def save_annotations_to_json( image_paths, labels_list, boxes_list=None, + masks_list=None, class_names=None, save_dir=None, file_name="annotations.json", -): +) -> None: + """Saves annotations to a JSON file. + + Args: + image_paths (list): List of image paths. + labels_list (list): List of labels. + boxes_list (list, optional): List of bounding boxes. Defaults to None. + masks_list (list, optional): List of instance segmentation masks. Defaults to None. + class_names (list, optional): List of class names. Defaults to None. + save_dir (str, optional): Directory to save the JSON file. Defaults to None. + file_name (str, optional): Name of the JSON file. Defaults to 'annotations.json'. + + No return value. + """ + if save_dir is None: + save_dir = os.getcwd() + annotations = {} for i in range(len(image_paths)): # for image_path, bboxes, labels in zip(image_paths, boxes_list, labels_list): @@ -23,6 +40,10 @@ def save_annotations_to_json( bboxes = boxes_list[i] annotations[image_name]["boxes"] = bboxes.tolist() + if masks_list is not None: + masks = masks_list[i] + annotations[image_name]["masks"] = masks + annotations["class_names"] = class_names # Save to JSON file diff --git a/datadreamer/utils/luxonis_dataset_converter.py b/datadreamer/utils/luxonis_dataset_converter.py index d9cb5a5..fec3d36 100644 --- a/datadreamer/utils/luxonis_dataset_converter.py +++ b/datadreamer/utils/luxonis_dataset_converter.py @@ -1,36 +1,83 @@ from __future__ import annotations +import logging import os +from typing import Dict, List -from luxonis_ml.data import LuxonisDataset +from luxonis_ml.data import DATASETS_REGISTRY, LuxonisDataset from luxonis_ml.data.utils.enums import BucketStorage from PIL import Image from datadreamer.utils import BaseConverter +logger = logging.getLogger(__name__) + class LuxonisDatasetConverter(BaseConverter): """Class for converting a dataset to LuxonisDataset format.""" - def __init__(self, seed=42): + def __init__( + self, + dataset_plugin: str = None, + dataset_name: str = None, + seed: int = 42, + is_instance_segmentation: bool = False, + ): super().__init__(seed) - - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + self.is_instance_segmentation = is_instance_segmentation + self.dataset_plugin = dataset_plugin + self.dataset_name = dataset_name + + if self.is_instance_segmentation: + logger.warning( + "Instance segmentation will be treated as semantic segmentation until the support for instance segmentation is added to Luxonis-ml." + ) + + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + copy_files: bool = True, + ) -> None: """Converts a dataset into a LuxonisDataset format. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ annotation_path = os.path.join(dataset_dir, "annotations.json") data = BaseConverter.read_annotations(annotation_path) - self.process_data(data, dataset_dir, output_dir, split_ratios) + self.process_data( + data, dataset_dir, output_dir, split_ratios, keep_unlabeled_images + ) + + def process_data( + self, + data: Dict, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + ) -> None: + """Processes the data into LuxonisDataset format. - def process_data(self, data, dataset_dir, output_dir, split_ratios): + Args: + data (dict): The data to process. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + + No return value. + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) class_names = data["class_names"] image_paths = list(data.keys()) image_paths.remove("class_names") @@ -42,42 +89,97 @@ def dataset_generator(): image_full_path = os.path.join(dataset_dir, image_path) width, height = Image.open(image_full_path).size labels = data[image_path]["labels"] + + if len(labels) == 0 and keep_unlabeled_images: + logger.warning( + f"Image {image_path} has no annotations. Training on empty images with `luxonis-train` will result in an error." + ) + yield { + "file": image_full_path, + } + for label in labels: yield { "file": image_full_path, - "class": class_names[label], - "type": "classification", - "value": True, + "annotation": { + "class": class_names[label], + "type": "classification", + # "value": True, + }, } + if "masks" in data[image_path]: # polyline format + masks = data[image_path]["masks"] + for mask, label in zip(masks, labels): + poly = [] + poly += [ + (point[0] / width, point[1] / height) for point in mask + ] + yield { + "file": image_full_path, + "annotation": { + "type": "polyline", + "class": class_names[label], + "points": poly, # masks, + }, + } + if "boxes" in data[image_path]: boxes = data[image_path]["boxes"] - for box in boxes: - x, y, w, h = box[0], box[1], box[2] - box[0], box[3] - box[1] + for box, label in zip(boxes, labels): + x, y = max(0, box[0] / width), max(0, box[1] / height) + w = min(box[2] / width - x, 1 - x) + h = min(box[3] / height - y, 1 - y) yield { "file": image_full_path, - "class": class_names[label], - "type": "box", - "value": (x / width, y / height, w / width, h / height), + "annotation": { + "class": class_names[label], + "type": "boundingbox", + "x": x, + "y": y, + "w": w, + "h": h, + }, } - dataset_name = os.path.basename(output_dir) + dataset_name = ( + os.path.basename(output_dir) + if self.dataset_name is None + else self.dataset_name + ) if LuxonisDataset.exists(dataset_name): dataset = LuxonisDataset(dataset_name) dataset.delete_dataset() + # if dataset_plugin is set, use that + if self.dataset_plugin: + if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: + logger.info(f"Using {self.dataset_plugin} dataset") + dataset_constructor = DATASETS_REGISTRY.get(self.dataset_plugin) + dataset = dataset_constructor(dataset_name) + else: + raise ValueError( + "GOOGLE_APPLICATION_CREDENTIALS environment variable is not set for using the dataset plugin." + ) # if LUXONISML_BUCKET and GOOGLE_APPLICATION_CREDENTIALS are set, use GCS bucket - if ( + elif ( "LUXONISML_BUCKET" in os.environ and "GOOGLE_APPLICATION_CREDENTIALS" in os.environ ): + logger.info("Using GCS bucket") dataset = LuxonisDataset(dataset_name, bucket_storage=BucketStorage.GCS) - print("Using GCS bucket") else: + logger.info("Using local dataset") dataset = LuxonisDataset(dataset_name) - print("Using local dataset") - dataset.set_classes(class_names) - - dataset.add(dataset_generator) - dataset.make_splits(split_ratios) + dataset.add(dataset_generator()) + + if not keep_unlabeled_images: + n_empty_images = len( + list(filter(lambda x: len(data[x]["labels"]) == 0, image_paths)) + ) + if n_empty_images > 0: + logger.info( + f"Removed {n_empty_images} empty images with no annotations from the dataset." + ) + dataset.make_splits(tuple(split_ratios)) diff --git a/datadreamer/utils/merge_raw_datasets.py b/datadreamer/utils/merge_raw_datasets.py index 47c1dc0..c6eb64e 100644 --- a/datadreamer/utils/merge_raw_datasets.py +++ b/datadreamer/utils/merge_raw_datasets.py @@ -2,16 +2,33 @@ import argparse import json +import logging import os import shutil +from typing import List +logger = logging.getLogger(__name__) -def merge_datasets(input_dirs, output_dir, copy_files=True): + +def merge_datasets( + input_dirs: List[str], output_dir: str, copy_files: bool = True +) -> None: + """Merges multiple raw datasets into a single dataset. + + Args: + input_dirs (List[str]): A list of input directories containing raw datasets. + output_dir (str): The output directory where the merged dataset will be saved. + copy_files (bool, optional): Whether to copy the files from the input directories + to the output directory. Defaults to True. + + No return value. + """ + # Check if all input directories exist config_tasks = [] config_classes = [] random_seeds = [] for input_dir in input_dirs: - with open(os.path.join(input_dir, "generation_args.json")) as f: + with open(os.path.join(input_dir, "generation_args.yaml")) as f: generation_args = json.load(f) config_tasks.append(generation_args["task"]) config_classes.append(generation_args["class_names"]) @@ -29,7 +46,7 @@ def merge_datasets(input_dirs, output_dir, copy_files=True): raise ValueError("All datasets must have different random seeds") # Create output directory - print(f"Output directory: {output_dir}") + logger.info(f"Output directory: {output_dir}") if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) @@ -45,12 +62,12 @@ def merge_datasets(input_dirs, output_dir, copy_files=True): if copy_files: shutil.copy( os.path.join(input_dir, "generation_args.yaml"), - os.path.join(output_dir, f"generation_args_{i}.json"), + os.path.join(output_dir, f"generation_args_{i}.yaml"), ) else: shutil.move( os.path.join(input_dir, "generation_args.yaml"), - os.path.join(output_dir, f"generation_args_{i}.json"), + os.path.join(output_dir, f"generation_args_{i}.yaml"), ) # Copy or move images diff --git a/datadreamer/utils/nms.py b/datadreamer/utils/nms.py index 530707c..f0f29e4 100644 --- a/datadreamer/utils/nms.py +++ b/datadreamer/utils/nms.py @@ -4,8 +4,10 @@ # https://github.com/ultralytics/yolov5/blob/master/utils/general.py from __future__ import annotations +import logging import os import time +from typing import List import cv2 import numpy as np @@ -22,6 +24,8 @@ ) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) os.environ["NUMEXPR_MAX_THREADS"] = str(min(os.cpu_count(), 8)) # NumExpr max threads +logger = logging.getLogger(__name__) + def xywh2xyxy(x): """Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 @@ -42,7 +46,7 @@ def non_max_suppression( agnostic=False, multi_label=False, max_det=300, -): +) -> List[np.ndarray]: """Runs Non-Maximum Suppression (NMS) on inference results. This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775 Args: @@ -131,7 +135,7 @@ def non_max_suppression( output[img_idx] = x[keep_box_idx] if (time.time() - tik) > time_limit: - print(f"WARNING: NMS cost time exceed the limited {time_limit}s.") + logger.warning(f"WARNING: NMS cost time exceed the limited {time_limit}s.") break # time limit exceeded return output diff --git a/datadreamer/utils/single_label_cls_converter.py b/datadreamer/utils/single_label_cls_converter.py index e5515d5..daa3bd8 100644 --- a/datadreamer/utils/single_label_cls_converter.py +++ b/datadreamer/utils/single_label_cls_converter.py @@ -1,10 +1,14 @@ from __future__ import annotations +import logging import os import shutil +from typing import Dict, List from datadreamer.utils import BaseConverter +logger = logging.getLogger(__name__) + class SingleLabelClsConverter(BaseConverter): """Class for converting a dataset for single-label classification task. @@ -29,17 +33,25 @@ class SingleLabelClsConverter(BaseConverter): │ ├── class_2 """ - def __init__(self, seed=42): + def __init__(self, seed: int = 42): super().__init__(seed) - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + copy_files: bool = True, + ) -> None: """Converts a dataset into a format suitable for single-label classification. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -47,16 +59,23 @@ def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): data = BaseConverter.read_annotations(annotation_path) self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) - def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True): + def process_data( + self, + data: Dict, + image_dir: str, + output_dir: str, + split_ratios: List[float], + copy_files: bool = True, + ) -> None: """Processes the data by removing images with multiple labels, then dividing it into training and validation sets, and saves the images with single labels. Args: - - data (dict): The dictionary containing image annotations. - - image_dir (str): The directory where the source images are located. - - output_dir (str): The base directory where the processed data will be saved. - - split_ratios (float): The ratio to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + data (dict): The dictionary containing image annotations. + image_dir (str): The directory where the source images are located. + output_dir (str): The base directory where the processed data will be saved. + split_ratios (float): The ratio to split the data into training, validation, and test sets. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ @@ -64,12 +83,12 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru class_names = data["class_names"] images.remove("class_names") - print(f"Number of images: {len(images)}") + logger.info(f"Number of images: {len(images)}") # Remove images with multiple labels single_label_images = [img for img in images if len(data[img]["labels"]) == 1] - print(f"Number of images with single label: {len(single_label_images)}") + logger.info(f"Number of images with single label: {len(single_label_images)}") # Split the data into training, validation, and test sets train_images, val_images, test_images = BaseConverter.make_splits( diff --git a/datadreamer/utils/yolo_converter.py b/datadreamer/utils/yolo_converter.py index 36452da..e4ad15a 100644 --- a/datadreamer/utils/yolo_converter.py +++ b/datadreamer/utils/yolo_converter.py @@ -1,12 +1,17 @@ from __future__ import annotations +import logging import os import shutil +from typing import Dict, List +import numpy as np from PIL import Image from datadreamer.utils import BaseConverter +logger = logging.getLogger(__name__) + class YOLOConverter(BaseConverter): """Class for converting a dataset to YOLO format. @@ -29,35 +34,53 @@ class YOLOConverter(BaseConverter): │ ├── labels """ - def __init__(self, seed=42): + def __init__(self, seed=42, is_instance_segmentation: bool = False): super().__init__(seed) - - def convert(self, dataset_dir, output_dir, split_ratios, copy_files=True): + self.is_instance_segmentation = is_instance_segmentation + + def convert( + self, + dataset_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + copy_files: bool = True, + ): """Converts a dataset into a format suitable for training with YOLO, including creating training and validation splits. Args: - - dataset_dir (str): The directory where the source dataset is located. - - output_dir (str): The directory where the processed dataset should be saved. - - split_ratios (list of float): The ratios to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. + dataset_dir (str): The directory where the source dataset is located. + output_dir (str): The directory where the processed dataset should be saved. + split_ratios (list of float): The ratios to split the data into training, validation, and test sets. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ annotation_path = os.path.join(dataset_dir, "annotations.json") data = BaseConverter.read_annotations(annotation_path) - self.process_data(data, dataset_dir, output_dir, split_ratios, copy_files) + self.process_data( + data, + dataset_dir, + output_dir, + split_ratios, + keep_unlabeled_images, + copy_files, + ) - def convert_to_yolo_format(self, box, image_width, image_height): + def convert_to_yolo_format( + self, box: List[float], image_width: int, image_height: int + ) -> List[float]: """Converts bounding box coordinates to YOLO format. Args: - - box (list of float): A list containing the bounding box coordinates [x_min, y_min, x_max, y_max]. - - image_width (int): The width of the image. - - image_height (int): The height of the image. + box (list of float): A list containing the bounding box coordinates [x_min, y_min, x_max, y_max]. + image_width (int): The width of the image. + image_height (int): The height of the image. Returns: - - list of float: A list containing the bounding box in YOLO format [x_center, y_center, width, height]. + list of float: A list containing the bounding box in YOLO format [x_center, y_center, width, height]. """ x_center = (box[0] + box[2]) / 2 / image_width y_center = (box[1] + box[3]) / 2 / image_height @@ -65,23 +88,58 @@ def convert_to_yolo_format(self, box, image_width, image_height): height = (box[3] - box[1]) / image_height return [x_center, y_center, width, height] - def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=True): + def convert_masks_to_yolo_format( + self, masks: List[List[float]], w: int, h: int + ) -> List[float]: + """Converts masks to YOLO format. + + Args: + masks (list of list of float): A list containing the masks. + w (int): The width of the image. + h (int): The height of the image. + + Returns: + list of float: A list containing the masks in YOLO format. + """ + return (np.array(masks) / np.array([w, h])).reshape(-1).tolist() + + def process_data( + self, + data: Dict, + image_dir: str, + output_dir: str, + split_ratios: List[float], + keep_unlabeled_images: bool = False, + copy_files: bool = True, + ) -> None: """Processes the data by dividing it into training and validation sets, and saves the images and labels in YOLO format. Args: - - data (dict): The dictionary containing image annotations. - - image_dir (str): The directory where the source images are located. - - output_dir (str): The base directory where the processed data will be saved. - - split_ratios (float): The ratio to split the data into training, validation, and test sets. - - copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. - + data (dict): The dictionary containing image annotations. + image_dir (str): The directory where the source images are located. + output_dir (str): The base directory where the processed data will be saved. + split_ratios (float): The ratio to split the data into training, validation, and test sets. + keep_unlabeled_images (bool, optional): Whether to keep images with no annotations. Defaults to False. + copy_files (bool, optional): Whether to copy the source files to the output directory, otherwise move them. Defaults to True. No return value. """ images = list(data.keys()) images.remove("class_names") + empty_images = list(filter(lambda x: len(data[x]["labels"]) == 0, images)) + if keep_unlabeled_images and len(empty_images) > 0: + logger.warning( + f"{len(empty_images)} images with no annotations will be included in the dataset." + ) + elif not keep_unlabeled_images and len(empty_images) > 0: + logger.info( + f"{len(empty_images)} images with no annotations will be excluded from the dataset." + ) + for image in empty_images: + images.remove(image) + train_images, val_images, test_images = BaseConverter.make_splits( images, split_ratios ) @@ -114,11 +172,22 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru label_output_dir, os.path.splitext(image_name)[0] + ".txt" ) with open(label_file, "w") as f: - for box, label in zip(annotation["boxes"], annotation["labels"]): - yolo_box = self.convert_to_yolo_format( - box, image_width, image_height - ) - f.write(f"{label} {' '.join(map(str, yolo_box))}\n") + if self.is_instance_segmentation: + for masks, label in zip( + annotation["masks"], annotation["labels"] + ): + yolo_box = self.convert_masks_to_yolo_format( + masks, image_width, image_height + ) + f.write(f"{label} {' '.join(map(str, yolo_box))}\n") + else: + for box, label in zip( + annotation["boxes"], annotation["labels"] + ): + yolo_box = self.convert_to_yolo_format( + box, image_width, image_height + ) + f.write(f"{label} {' '.join(map(str, yolo_box))}\n") if copy_files: shutil.copy( @@ -131,13 +200,13 @@ def process_data(self, data, image_dir, output_dir, split_ratios, copy_files=Tru self.create_data_yaml(output_dir, data["class_names"]) - def create_data_yaml(self, root_dir, class_names): + def create_data_yaml(self, root_dir: str, class_names: List[str]) -> None: """Creates a YAML file for dataset configuration, specifying paths and class names. Args: - - root_dir (str): The root directory where the dataset is located. - - class_names (list of str): A list of class names. + root_dir (str): The root directory where the dataset is located. + class_names (list of str): A list of class names. No return value. """ diff --git a/examples/generate_dataset_and_train_yolo.ipynb b/examples/generate_dataset_and_train_yolo.ipynb index 77344cf..2a08030 100644 --- a/examples/generate_dataset_and_train_yolo.ipynb +++ b/examples/generate_dataset_and_train_yolo.ipynb @@ -1,436 +1,451 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "# DataDreamer Tutorial: Generating a dataset for object detection, training a model, and deploying it to the OAK (optional)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b5_2ivH03etO", - "metadata": { - "id": "b5_2ivH03etO" - }, - "outputs": [], - "source": [ - "!pip install datadreamer" - ] - }, - { - "cell_type": "markdown", - "id": "c3704c07", - "metadata": { - "id": "c3704c07" - }, - "source": [ - "## Generate a dataset with your own classes (might take some time to download all models)" - ] - }, - { - "cell_type": "markdown", - "id": "M4v-QieP4tXL", - "metadata": { - "id": "M4v-QieP4tXL" - }, - "source": [ - "Make sure you are using the GPU runtime type (in Google Colab).\n", - "\n", - "~8 min to generate 100 images\n", - "\n", - "~2 min to annotate them" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ab1e2f9", - "metadata": { - "id": "6ab1e2f9", - "scrolled": true - }, - "outputs": [], - "source": [ - "!datadreamer --save_dir generated_dataset \\\n", - " --class_names robot tractor horse car person bear \\\n", - " --prompts_number 100 \\\n", - " --prompt_generator simple \\\n", - " --num_objects_range 2 3 \\\n", - " --image_generator sdxl-turbo \\\n", - " --use_tta \\\n", - " --image_annotator owlv2 \\\n", - " --conf_threshold 0.15 \\\n", - " --seed 42" - ] - }, - { - "cell_type": "markdown", - "id": "7a10755e", - "metadata": {}, - "source": [ - "### Parameters\n", - "- `--save_dir` (required): Path to the directory for saving generated images and annotations.\n", - "- `--class_names` (required): Space-separated list of object names for image generation and annotation. Example: `person moon robot`.\n", - "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to `10`.\n", - "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to `False`.\n", - "- `--task`: Choose between detection and classification. Default is `detection`.\n", - "- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.\n", - "- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.\n", - "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", - "- `--prompt_generator`: Choose between `simple`, `lm` (language model) and `tiny` (tiny LM). Default is `simple`.\n", - "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.\n", - "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification. Default is `owlv2`.\n", - "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", - "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", - "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", - "- `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `\", hd, 8k, highly detailed\"`.\n", - "- `--negative_prompt`: Negative prompts to guide the generation away from certain features. Default is `\"cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution, static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy\"`.\n", - "- `--use_tta`: Toggle test time augmentation for object detection. Default is `False`.\n", - "- `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`.\n", - "- `--use_image_tester`: Use image tester for image generation. Default is `False`.\n", - "- `--image_tester_patience`: Patience level for image tester. Default is `1`.\n", - "- `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.\n", - "- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.\n", - "- `--batch_size_prompt`: Batch size for prompt generation. Default is 64.\n", - "- `--batch_size_annotation`: Batch size for annotation. Default is `1`.\n", - "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n", - "- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.\n", - "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n", - "- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7add74d9", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 497 - }, - "id": "7add74d9", - "outputId": "a5389937-2a4d-448b-e2f2-6be98018d9be" - }, - "outputs": [ - { - "data": { - "image/jpeg": "/9j/4AAQSkZJRgABAQEAZABkAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAHgAoADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArlLbxtHcePJvDf2FlgQPHHfeZlZJ0RHeILjghHBznsRjit7V9Tg0XRr3VLo4gtIHmf3CgnH14rx82/jDTPBdjq1zoNmHsLs67PdJfEzOWLPKDHs7xuy43cAD0oA9trMh8R6Fc3EFvBrWnSzXA3QxpdIzSjkZUA5PQ9PSr9vPFdW0VxA4eGVA6OOjKRkH8q8D03/hHP+FCN5H2X+3vNPlbdv2r7X537rb/FnGzGP4fbNAHq7eLorPxhq+m6pdWNlp1la20sc88gjJeQyAgszY/gGBgd+tdC9/Zx2P26S7gSz2B/tDSAR7T33dMe9cNYaVZ6h8Xdcm1C0guJodKs1XzEDKpYybsA/TGfTPrXK2620XhzwvbaiEHh+28S3cM6yf6pQskwhV88bA+Bzx0oA9TuvE+kQ+HbzXINQtLqxtY2dpYbhGQkD7u4HGScD6kUzQfFmjeIdGTUbPULNlECTXCLcI5tty52yYPykcjnHQ1wt5Fp83ivxjHoaQPYt4d/01bYAxm6y+zOON+z+lV9SbTX+E/hJgbY6YZtM/tcx42+TgbvNx23bc596APUdO1fTdXiaXTNQtL2NDtZ7aZZAp9CVJqe5uYLO3e4up44IIxl5JXCqo9STwK4O1/sz/hbtp/wj/2bZ/ZEv9o/ZNvl43p5O7bxu+/jvj2qf4i/Z/tnhX+1Nn9if2qPtnm/6vd5b+Vvzxt3468ZxmgDpJfEujJoVzrUeqWU2n26kvcR3CNHkdtwOMkkDHqRWR4B8SXnirSDqd1e6PKsqRultp7FntdwJKTEsfmwV7L0PFc48emyeNfFaaMls+nHQB9uFuAYvtO59mccb9n9K6r4eQxRfDzw60cSIX023Zyqgbj5a8n1oA1b7X9G0y4S31DVrC0nk+5HcXKRs30BIJrRByMjpXmHiTU11DxB4hsSnhiygsIo0uZtVtjNNcho93ygMvygHaOTk5xXR/DKeW5+Gfh6SZy7/Y0XcTk4HA/QCgDcv9d0fSpo4dR1Wxs5Zf8AVpcXCRs/0DEZov8AXtH0po11HVrGzaUZjFxcpGXHtuIzXDeHP7B/4SHxj/wkX2D+1P7Sfd9v2bvsmxfKxu/gxnp3zS+INUju/FN7ptunhiyisrCF5bzWIPNM0T7yFjAZfkXByckZbpQB39xqNlaRwyXN5bwpOwSJpJVUSMQSApJ5OATgdgaLHULLU7YXNhdwXduSVEsEgdSRwRkccV4xp8EOsfDD4d216qz27+IvKKMpCtGrXIVdp527VAwe3FeyiXTtNe3shJa2r3DMIIAVQyEDJCr3IHJxQBYlljgieWaRI40G5nc4Cj1JPSqmna1pWsCQ6Zqdle+WcP8AZp1k2n32k4rN8b6rHong3Ub+WyhvUjRVME/+rbc6rl+D8o3ZPsDXCaXLNB8WtBje90CWeaxuVmXRrcxgR4VlEhLtu5GV6YwfWgDvPGeuXPh3w62oWkcLzC5t4dswJXEkqIehHOGOPetKx1jTNTkmjsNRtLuSA7ZVgnWQxn0YA8fjXI/GLyv+FbXvn7vJ+0Wvmbc52+fHnGOc4qjP/Y//AAsfwt/wiv2HzPsd19p+w7dv2byx5e/bxjzNuM+9AHdNrmkLqY0xtVsRqB6WpuE80/8AAM5/Skv9e0fSpkh1HVrGzlk+4lxcpGzfQMRmvFNF0ifWPAAgu9f8MafN5jNdS3NoVvbe5WQlmeQygiQMOuOntXYeIdT+2eJtX05l8MWa2FpD9qu9YtvNe4V1LfKoZfkHI6nkmgD0K51GxsliN1eW8AmJEZllVd5ALEDJ54BPHYGlsr+z1K0S7sLqC6tnJCzQSB0bBwcEcHBBH4V43plvFrPgj4X2+oILiFtSZGSQZDKiTbVIPbCgYPbivaIYIbaIRQRJFGOiIoUD8BQBJRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFR3FxDaW0tzcSLHDChkkduiqBkk/hUlZXifTZtY8KaxpluwWa7spoIyTgbmQgZ9smgCno/jC01e+gtDY6hZPdQG4tDeRKguYxjLJhj03KdrYbBziuhrhNPmu9f8R+HJhpN/YppNvM1213btEokaMRiNCfv/xHK5XCjnmr3jiDVLPw/rGtaf4i1CyezsZZ47aKO3aIsiFhnfEW5I5+b6YoA62ivIlv/ExAP/CXan0/597X/wCM04X3iU/8zdqf/gPa/wDxmp5kOx63RXkn23xL/wBDdqf/AID2v/xmgXviX/obtT/8B7X/AOM0cyCx63RXk32zxJ/0N2p/+A9p/wDGaPtniX/obtT/APAe1/8AjNHMhHrNFeTfbPEuP+Ru1P8A8B7T/wCM037b4m/6G7U//Ae1/wDjNPmQ7HrdFeTC88Sn/mbtT/8AAe1/+M077V4kz/yN2p/+A9p/8ZpcyCx6heWdrqFrJa3ttDc20gw8MyB0YdeQeDUkkUc0LxSxq8TqVZGGQwPBBHcV5aLjxGf+Zv1T/vxaf/GahvL7xJa20k3/AAl2qEIpP/Hva/8AxmjnQWPV7e3htLeK3toY4YIlCRxRqFVFAwAAOAAO1cv4N8F2ug6BpUOo2OnT6tZIyC8SIMy5dmG12UMOG9q8lufHPi2PTRND4nvjNnkNbW23H/fqrOneJvHFysTXHia7iDnvbWwyPbMVLnQWZ7ulnbR3ct2lvCtzKqpJMEAd1XOAW6kDJwO2TUX9l6f9ilsvsFr9klZmkg8ldjljliVxgkk5Pqa8q07VPEt3cTrJ4s1RUj6EW9rk/wDkGqfibX/FGi6ULu38VakzseBJDaEfkIRRzofKz2Kw0yw0q2+zadY21nBnPlW8Sxrn1woAqC00DRrBLlLPSbC2S6/4+FhtkQTdfv4HzdT19TXzefiZ46AJ/wCEln/8Bbf/AON1oaf4+8b3UDSyeJLoIh+Zhb2w4+nlU+ZCsfQunaRpukRNFpmnWllG53MltCsYY+pCgVPc21veW7291BHPBIMPFKgZWHoQeDXlUGq6zcaEdRj8Y6sQEyf9HtMZ/wC/FcRP8QfGESuB4out4bC5t7bB/wDIVL2iDlZ9C2ek6bp9k1lZafaW1o2d0EMKohzwcqBjmrFvbw2ltFbW0McMEShI4o1CqigYAAHAAHavGNG1Dx5dXSLfeKLpoXjDq0EFsuD6cxGqHifxZ4t0S4EMPiPUc4zulhtWH6Qih1Egsz2y50bS7y+hvrrTbOe7hx5U8sCtJH/usRkfhVi0tLawtY7Wzt4re3iG2OKFAiIPQAcCvmST4n+N0GP+EmuCf+vW3/8AjdOtPiX45nnRG8SzkE44trcf+06fMgsfR9/oWj6rNHNqOlWN5LF/q3uLdJGT6FgcU680XStQngmvdMsrmWD/AFLzQK7R/wC6SOPwryHwz4p1fVjPBqPjLUrW5Q/KBDaBSPxhqnqnivxLYyzrH40unCcp/o1qcj3/AHVJ1Eh8rPbF0jTUhghXTrRYreYzwoIVCxSEkl1GOGyzcjn5j61Fd6JaXuuafq0+9p9PSVbdMjYpkChmxjO7AwOejGvm+4+J3jeIkL4muD9bW3/+N0f8LP8AHPkFx4juSw/6dbfH/ounzIVj6elijnieKaNZI3BVkcZDA9QQeoqjZ6Bo2nNE1jpFhbNCWMZgtkQoWADEYHGQBn1wK+aB8VPHbcL4kmJ/69Lf/wCN0q/FLx4Ww3iSYf8Abpb/APxuncVj6hurO2voPIu7eG4h3K3lzIHXKkEHB7ggEe4qCx0fTNMkmksNOs7SSc7pWggWMyH1Ygc/jXzM/wAUvHSnA8TTH/t1t/8A43SD4peOz/zMk3/gLb//ABui4H0rP4d0S6vxf3Gj6fNejBFxJao0nHT5iM1Jd6Npd/dw3d5ptnc3MH+qmmgV3j7/ACsRkfhXzN/wtPx3/wBDJN/4C2//AMbqWH4o+NW3eZ4muBjpi1t//jdDdh2PpaPSNNijto49OtEjtXMluqwqBCxzlkGPlPzHkep9auV8wD4neNDx/wAJRcA9s2tv/wDG6RfiT49Z9v8Awks2PX7Lb/8AxulzILH1BRXy63xL8f8AmFV8SSkDv9lt/wD43UbfE/x+gy/iKYD/AK9Lf/43TUkwsz6mor5W/wCFqePP+hkm/wDAW3/+N0f8LV8ef9DJN/4C2/8A8bpiPqmivlb/AIWp48/6GSb/AMBbf/43Qfir48/6GSb/AMBbf/43QB9U0V8q/wDC1fHn/QyS/wDgLb//ABunf8LU8d/9DJN/4C2//wAboA+qKK+VT8VfHg/5mSX/AMBbf/43Sr8VvHWOfEk+e2LW3/8AjdAH1TRXIfDDWNQ174f6fqWqXJubyV5w8pRVLbZnUcKAOgHauvoAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK53x9/yTvxL/ANgu5/8ARTV0Vc74+/5J34l/7Bdz/wCimoA4RPuj6U4HBqNfuL9KcKxKH5oFNFRTXlrbOEmuI42IzhmAOKGBYzSjpVL+1bD/AJ/IP++xThqun/8AP7B/32KLoC5SYqsNV0//AJ/YP++xR/amnf8AP7B/32KfMgLY4pwqidV0/wD5/YP++xSrq2nj/l9g/wC+xSugL4ODii7gkuLR4oioZhjB71yfiTX4YY0S1u03MRko2e9bB8U2UNkjLKkkoAO3d1/GpbQ0c7qwtIYHtJlkt5VABBHB+holk/0KFIree5dSBE5OFzWd4l8SW2qGIC3eORXB5IOR9a6vQNas9XsjbLC0Mka/KCOuO4pDIPDGpT3FxPbXCeXMBtK9MVzHjK2vrS7igublpYiCyDPStdp57fXrq4mZI5Fj7HGfeue1Ca4voPNuHMjn7pznApc2pRgTLlcCiC5kjt3hVjtbr70SIfug81D0O0VqrGbO3tNWuk8BzQsrRxfcSQDrXEo6zXcQuGYRlgGIrXfXZG0JdNMeFXv61Q02zhvrsxTT+SCpKsfWpStcbPWdHvdP03w7PBHdbpFHmRsxyfYVxOv6zJql1vkjXcF4ArV1rTLSx8E2kpkH2kMoO3v61xhnWQ4h3FzWbVylpuUni+Z8qST+lVoyySDZncOmKsyTMuVxgnqajtuLhT1571tG9rkPc00c+V++Lbz3okGEG7Le9W1spruRI8YyRjFOvbAwXIhY7dvXmsOZGljIuYkkjDA4q9bWsCaU5l+aVx8q+lV7uCO3iOGyT0qms0wRijcKOc1rB6aE9dRsVsy7z0I6UpbcCHXHvUnnlUBLAk1VZjyT3pptvUWiEYKmQvNNBwaaacuCwBrTYgtWjIxKMm4mrLpGqmIQ7fVqorujlHlHJp0ktwoIfPNZyi29C1KxIbZZGVYySe9Biw21ZGyOoqYK0USEcMRzVaRthYrJ857Uk7gyxHKY4iWIJBqGSfz0Ikbjtiqg8wqVJ60MjwYzz7VagK5Hup1JndzjBpa0JClOKSg0AJ3pw5pKBQA5VViQTzU62iNtKvwOtQJGznCjNTIrIhG076mQ0fSvwc/5Jfpn/XW5/wDSiSu7rg/g0SfhdpZPXzbn/wBKJK7yqEFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFc74+/5J34l/7Bdz/6KauirnfH3/JO/Ev/AGC7n/0U1AHAr90fSnCgKdinHalArEoUU7SVVtavdyg/uIuo93pPwpdHP/E6vf8ArhF/N64cx/3aXy/MqO5o219a3Wo31kkJEtmUEhZRg7l3DH4fSrnlx/3F/KvNtVl8PN4l8UHWHQv5UP2UsT9/yv8Alnj/AJaZx056e9RXzy3F7Yr4gvNMiQaXBtOpQtLH5/Pm4IdQr529cnGMY5rw/qydtbaL8lt3/Q2tLsen+XH/AHF/Kl8qP+4v5V5tf3Fn9l0fTb670u7RLNnXUdRheRZDuACom4HfjBzuz6U3S/s+tW/gtNUkE6iG7EqSt98KAAHBPPQcHOcc5pfV7R5nLTXp6/5BaV7WPR7mS1tLWS5uDFFBEpd5HwAqjqTVTS9W0vWDKtlJueIAukkLRsAeh2sAcHBwehrN8WrAfB1xb2nlskPksIYiDmNJEZlAHX5VIxVKz1zTrzxjPqlrdxSWNrpbJLcKflLFwwXPcgKxx23e9ZwhGVNy/rp+ZThJO1iTxNq2n2sqwtHOZev7q0kkH5qpApdI8U6SdOguLictHPnyAtu7PJjrhAu49PSsPxNqbXOnF7M/vdQKw2zr/t/xfguW/CrU9hpenWmlXI1i10qezR4ITdJvjdCFypAZTn5VIII/GnCEGlzXu7/h8u/6icZLWx0txHYapp41OynSSIg7CBjkHBGDyCCCMHkV5xd6w1ldubBWM8c8hcgcY3ng1uaAZodIu5PtMkwur+adC0ezchIw23qoJBIHXBFctc6qI5bmOG2InM0iyORwRuNejlqjCtOKewOlUklaL+4sQreazdSXLoS7DCgfyrN1C1u9Ol8qdDGD0BNbMOpXFjp0DQNEsqHdgMCSfcVh6hdXWoTtcXbGSQ+g6V6/NG+4fV61vgf3MznG0kgmoI3G8gDJqZllyV2Nj6UttCUdiUPI9K0U4pbk/VqzfwP7mROrPzS2C51CEFsAODT545duFU/gKks7UNzKrKQeD0odSNtwWFrX+B/czuvEl9bXmlWsEbK8i8sR2riLkeQpeNcVoBljhIQ5b3rLu5JmXYqMR7CsKbV9zWVCtb4H9zM9iSc+tW4FOFXyDk96dZwouTPG5Pbg1dEssUP7tCeePUVtKotkZrC1t+R/cy1bMYZY3SYhl9amu5klmMjne56tWZbSyiQySIcnjkUtyJGO2LJ3dqx0va5To1UruD+5lLUnyMKRgUy2sbq4sZ7mKF3hi++wHArVuvDMlvoB1GaYbyeEBqPR9XvtP0a/tIY99tMMSHbnbXVFJI5pPUzHCeWrKh6c1CdzdFNXhKDEAgwB1zU8d5CIgiIvmDuRU3a6Ba5nJZSuVJUhCeT6UydY45SIzlR396uzagTCykjJ446Vmu241UXJ7iaS2EDENuBwadJNI4G45xTQCTVmKxaVA2QBmrbS3Elch+0ykcnJx1qHnfknmrklpt+4CQOppgtXEgBXHekpRQ+VkcqyQsN4IyMioi+45NXtRdp5UyPuriqZt2z8vP0pxd0JjadimjIPNLVCF7UlL2pKACiigEjpQAK7ocqSKspcgghgc460zzcxhSqn8KFVHYAKRUPXcd+x9LfBo5+Fuln/AKa3P/pRJXeVwfwbG34XaWPSW5H/AJMSV3lWIKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK53x9/yTvxL/wBgu5/9FNXRVzvj7/knfiX/ALBdz/6KagDl4Y1kiUY7UGxwc9qkt4ym36VYYZzmuc0KghUcVV08Imu3uSBmCLqfd6vsB2qpcabaXTb57aKVwMbnUE4rHE0fbU3Tva41o7kWm29rcW7PKFZt5Gd2PSrn2Cw/uJ/32f8AGuP8G6ZZ3GkTNJaxOwuGGWUHjC1vnRNOzg2UH/fArxqWTxlBPm/D/gnq5hWrUcVOnzPR92aX2Cw/uJ/32f8AGlFhp+fuJ/32f8azP7D07P8Ax5QY/wBwVjWsulya09gdPgypIz5Yq/7Fj/N+H/BOP65X/mf3s65rHTUGWRMf75/xqiY9LmiYRukcg9XqPUNG0qLT5WeytxhTjCDNYFh4V0/UdJhne2SORBk4H3vrR/Ysf5vw/wCCH1yv/M/vZbmZEB2zoTnHBq9a2cE8Sk3sW89sivItXto01GYQ2/lxh9oHWqiRNDMuVwc0f2JF/a/D/gi+u1/5n97PbdRtLeERLbuhckbsv1rhptPsItUBuioWW4kJVWJ4DnkmuYEMt7crHDA0krHACiusuPDt3Y+Fb+6vUVZ1QeWM5IGea3w2BhhpO7Tv5GkMZiU+aE5L5soajf6HHfvHDYxpEh4AkY7h+JpLa/8AD9y4SS08oHILB2yvuOa4lnJPJJ+tIGPTOK7ng4s1WcYvb2kv/An/AJm7rdtcaddlYHaS3YB43C9VNVrOaSVWMjcj2rpvDPjePS7CC0mRZUDEMGGfpXp1vZ6DqVqk0tjbqzjJGwCs24R92UUV9cxkvejWl/4E/wDM8HmnlT7r/oK0NO2y24eb5mzivXrjRPCkfD2kAPYY61VuPCnhyWP93A9vuGQYicVM50WrJDji8fv7WX/gT/zPLZ4zuIh7dapy3GAQCVYV3cvgW5iuMWNyk8UnQyfKV+tSj4a26fvdQ1IggZKwr/U1UFS6pESx2O6VZf8AgT/zPNftU4IJc4+grVhaKSNWJwMcmui17wJDa6VNfaTdtdRx8vG4AYD1GOtcStyYUWKQEetbOlTmrxSM/wC0cbF61Zf+BP8AzLkk7JIdqhl7ZqR7hZUVoU8pgQpOc8561ReeJ51AbIrVTTwZbeESD9+VJJ7ZOP6VnUpxhytLqjvwGNxNZ1YVKkmuSejbf2WbviXQJdN8NLOt95iNgsjc5J9K5zTb14vC+qW4gdhIVzIBwtdb4y8OvpWiRzG9lmh4Xy3OcE9xXNafqTW/hnUNOWNcTHJY9RXUnoeC9y7f6LpA8F297aT5usDeN2cnvxXHQ27zPtXr710TabbR2VsLeR5GkG5gexqK4ghgQuqEsvUZqeezsVy31MSSxmjY7wAB3zVcrg4qzcXDSPnJqvnnJreN7ambt0EyQKs2xkl+TcQg64qHJKgAVYijkWP0zUy2BFtJVQgsflXtUUtyZ2JAJA9BTTApwFYse9aN7YxW1tAsPnbpFDyMwwq1zuUU0jRJtGTKfNiyG6dqgR2izt6mtTVoLSBLeO2KsQn7xw2cmsogdRW9NqUbkTVnYaVLc008U8HHWh9p6VqSMooooAKWminDk4zigBaVXKNkVOloHJHmDj0prWmHAVwQe9RzJ6Dsz6U+DRz8LtLPrLc/+lEld5XCfBxdnwv0xc5xLcjP/bxJXd1YgooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKp6nqlpo9kbu9kdIQQuUjaQknoAqgk/gKALlFZB8T6Mvh+DXTegadcKjQy+W2ZN3ChUxuJPYYz7UQ+KNFn0W41db9FsrZis7yo0bRMMZVkYBlbkcEZ5HrQBr1zvj7/knfiX/ALBdz/6KatDSNe03XUmbT52doGCyxyRPFJGSMjcjgMMjkZHNZ/j7/knfiX/sF3P/AKKagDCRgEX6U1nGM1DECUX6VneJLqSx0dpYmAfcACawtY0uaryRxqGc4FThFKgjvXO6tNt8MW9w0mXIUkr3q9a6jG1xDbeZ85iDbai7uM5PwRqBSSSxwNrOz59+B/Su0kuIkniicgPIcL7149p+qSaVqqXK8qp+ZfUV20+u297qWkyRhgTICQR6jFZ0U/ZxPSzl/wC31fU6TUZHt4ldOSD0JxmuAN5KnivzktTHIf4GPX3rpPGmp26xR20bstwpD5HYVw2rX5n1mG7LskZUYbGOnWtDzGeqS7r7SLozxfvBGdqpz2rymz8U6np0MsAlYlvlw4+7XoHhXxDFeWlwVRiIxje3Q8VxV3YWd19svp5BHI8jFIwelNNJalWb2Mq51OKSxjiC7pN29mPrWhonhq68RyeZGRHEv3pGHFZWlaTNqurRWcQ+8eSew9a9pisYNH02CytlKjozDqfesa9T2a03NKVNzepk6XpVtoCbLbMkrHBcryTVHx9qaWtilkVLNKP3lb9qwn1CSZvkitk4B7muL1APqniBnYB85KnPCr61wxbcuaR2tWVkeZXCqs7CPOzPGaEt5ZWACHn1rtx4cg82e7mZW8vkKD1PrS2aQXKkLCrOw3L0BArv+s2Whx/V9dSv4Y8Nqsq3dxEH2YIDHofXFdyJhaOCXJ3Hjgtgn6VjQajawT7IVkSZV+bzFKj8OxFY91ql2kjKWhVGJYMrFsfnXHPmqSuzrhywjZHZskt05KSiOTrsZfvfnV6ye7s7kTpDIylQJY1O7afUD0ribTVI7RX3Ss8/DCVn3cnt7Vu6F4pVrwqZFJfjHc1k4SWpfOnod5HJb3sZkhIWQDowx+Yrzjx/f6hpNwkoUm1kOC4HRvQ13UmrWuAZsK3RWzisHWvs2vWFzps3ltuGUIPPsa0g4u1zNqSvY4/wnr1xqNzcWynAkgcHI6fKa4uQBnbzeTmuy8M6Nc6NoeuX8kbJJFEY43HXPqK40RPOxOe/Oa7qair8uxyVZN2vuUniZJcoCRnjFb0EJQQlpjliGLD+Hn/61QpGioFCg+uafbKXO1j1alXldL1R25UrTqf4J/8ApLNrxVrDT6ZDbx30k6jHDAY/OsDTpI20u/aVcyBQE4o1acvthVQoWrGiRRvpGrCRwpEQKg9zW0fhPMe5Ttr/AMuJQZPunG2o7y986QshIyKWOBHiDKoXH3veoLi32coeKFy3HrYrlMjINRNkUZYHGDUjgCDlTuPetURYjDnj2qVZyzL5hJXuKgCMRkAke1aOkW0U1wwuHEQC5Ut61NSUVG7BblkSxyRgIhVR61q/2nLqCfZ7qTbAsYQKqjLY6DNJf2sVrFD9pbfM65Yx9B6VluZreVQiOso5G5ea40o1NjXWJNrVvbIsYtbNoQCVdmPJNYrROrbSpB9K6iC1abTQ95LuyxcAY3BvSsVry3lvXldSoUbUX/GtaFR25ewpx6macY4pKCKOldZiJR2paKYDaQnmnGkA5zQBLHJhdvQetOQZzhunSoQCe1SzQyWsgR8ZIB4PrUtDPpf4N/8AJLtLz/z1uf8A0okrvK4L4MnPwt0o/wDTS5/9KJK72qEFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABVTUdTsdHsnvdSvIbS1QgNNM4RQScDJPvVuigDx7QNX06TwH4EvEu4Z7XRriI6kI23/Zt0EqK0gH3QGZc56de1Wr9xqR13xDZK9xoo1nTrhpIkLLPHB5fmyKP4lHHI6+WfSvV6KAON8P3ttrfj3VtY0qVZ9NGn21qbmPmOWZXlYhT0barqCe27FM+I+l3k/g/wAQ3ceu6hbwJpk5NnGkBifEbZBLRl+e+GHtiu1rnfH3/JO/Ev8A2C7n/wBFNQByaXYiuIIH6yqSv4VieO70R6KI0ZS/mDcKxJNf83xfpq+aGtYu4/2hzmqHjC4iudVmaGTchx344rn3Luaeq380vgm2VsKzY6VhWery2+ppfTszYi8sbTzVFr2eW0jtmkJiToKrSZ247VXKriuRygF+a6mw1GDydPZlG6BgGPcYrmCu5xx2pdzK42k89qxoq9KJ6ecf8jCr6nSeJNSivtSlliIKhdoNcve3jXKxRSYxEMDFSybgp7VQYfvM1pFdTzm7noXhzXbSx8F3Mb2Uh25BcLkMT71yVlpl7q1wkdvG8jMePRfrXW+Bjd3llcadcWw+wSRkeYRgA12On2VlodrHBbJlzhRxlmPqayq1VD1NIU3P0Knh7wvFobQfKr3Tj98/oPQVoa3OI5EBYAnOKuXN7FZA72zJt5zwfwrgNe8QSXEnlIdpJyO4P41wTbmzthHlOigBGgFFK7rmYguT0HT8a5yWW303TbuSF2eZn8sSdcY4rWN2lx4Ft3DAOp+mGB5rhNW1EjzYt6ARuCir34yM0UoOTsOcklcu67dNDArpEjHaEZ5HDMw/3e1ZgvPtdmr3Esqoh/doiBcn8KzZLh7u0LSyDe7knjp9apRhow0sTdDgY612xpe7ZnNKpqbt7e7I4mllnFwRxGknUe+Kzf7Wka4BwyocZLOXqpJdDy9q9/vEDkn61Wzk47elaQpLqZyqPoWZ9TnuWbJA54wMVLo9xLFqMbiQqCeTVNVLMAO/FdHo+nC7wioMkcsKqpywg0yafNKSZ2um2lxrSEq29QRg78Mp9vWuZ13VtQ0PXoY48hgMEMmMjPevRfCUaWEX2dlIlz3HNaPifwnba5ab2T98nzI3cGvOo2TudtVvY4fw3qcerWV7p91OwkdWKAHBz6CuJYLCQDxzWtotvcaZ4v8As8sbK8UhGOhIrKvZIptUnZl4MjYA6da6o2Tt0Oao7pMpzs3mjnI9KuRqoiBVznGTx0pl5ZtvSSJGz1wRircMuJkbyRkDBQ9DSrP3Vbud2VL95Vv/ACT/APSSGa1jliJQZlPTJqS2s2tdNu0uVMbuoK7u4q09oZQXshl25MP8Q+nrVeWWZ3miuNzMseApHIq4OXTY85pFK0ANuOOOjGq9wphkaGQ8fwMeMircVtdfYhIsZAz/ABcZqO+Xfp4M/wB9D+79xmrjuSyjdokSIVYlj1qO5BCopOeAadblnWQcEIhbBqvJIZG3HitkmZvUdHK0alRjFPhneGdZTg7T0IpLSRIrmOSRN6KwJX1Fb+p2lpfQG+tmSJSPunA59B61FSai0mtGVGLauiKxvYLyYRTo7uzAIuf1zWxrGlzXc0l5FLvKFVVRjIHQ1zsGj6hEYLlQo3MCMMCV+ordE8umOyToXJAKug/M1yVElJOmzSG1pFTVrUwW6K9ywiMZYL/EW965cqVPIIPWuwW5GqSNNPGxVBtyCNv+eaxJLASu1uh+dCWklYYGAK2oT5dJE1I31RWgt4WsbieRyCmAgH94nv8Ahmqcu0zNtOQTwa0/KW7c2ts4S3RwSGOC7VXuo0t5ctjzP7gXaFreMtSGtCrIhjA3Yye1R5pXJdtx5NJitUQFFFA60wFzgVYlAntI3UYaIbHyfyNV6sWaCab7P080bQfftSY0fSXwY/5JXpX/AF0uf/SiSu9rg/g2jJ8LtLRxhlluQQex+0SV3lMQUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAVzvj7/knfiX/ALBdz/6KauirnfH3/JO/Ev8A2C7n/wBFNQB81K7K6upww6GpX3uNzMWPXmoR1AqfO1OayYxEwFxSEcGkRs9acDl8Dml1EgQ4YnHao25cY61Ih+Yj2pHAU5rGh/DiernP/Iwq+ojklSCean0PQ7jXNTW3gXjq744UUy2tJb+6jggGXkOBXr2h6PF4a0hIj5Zu3G6QgcmirUUEcFOHMy1bwQadb29lAgCIAMnuauiERXE1xMFAizjPGTiqmlx/bdR85h8kfIz0zWN4p1eRrm4txwg+XA5J/AVxLXVnbbXlRh+IvFEdwSy/KEbDZ61xtxqEcquXZtxHyFuKi1STExeHCY/h6nPvWHcXO/cGZmY966qdBbmVSs1odZoGrh/D+oaWJcuMyrvPX1xXOXErsi5OTj0rPt5mglDoefap5LjfGCOG6VsqPLLQwdTmVmKs7LvVjwy4wKFkdYX+bAJ79qqsSCDVmOVGBVhwRWriZpjWRdw2nIwM0qAF9pIAPenpGu4kkAenrSoiYO4jdmpbKSuWrW0V7iNSwZSeucCu7s9NjsYo50Yg5GFUcVzeiPb2mZzGJfKIySOB711K6qL50htlh2vwSO1ediJSk/I7qMYx9TqdJmjjgl1BwPlHBzmuA174lawLqSKB9kfQKOP1rb1HUm021+xu6GBR8wxjdXnOuXVve3QNpGixAfiT+Na4OKfQzxL89Ta0bxbdX2qxtewrNLghZiPmXg9T3qstsGZ5W/1gOQtW/h9pEN/qUjXDhIlQgH/aPArb1/RX0SVpHj81X+7Io+WrrSSm0jKMW4psyWuVmtVW4lEL5G0d6qXEgimDBs7V6jvViG3gmIkeIF+prPvAPnCnjiud2dl5o9LLfiqf4J/+ksgmvJZJAA+1uox1FaNhrU0cbPPCJVBx5hA3r+OKxoYm+1AKMkmte5RYbB4yu0kda65WVonkq+5UvYZWZpkuTNATnk8r9R2ptnco0gs7hVeOT5d2fu57iqlncFIGUMdwPGDWhZpa6ncRxeS0dwDw0YyCfcf1qn7u4XuUre2hZ50t7pTuQqFk+U5/l+tVLnTby0z51u6gDO4DK/mOKv3OhXqzyCBUm+YnEbAsPw61Gl1qOlARlpogfvRyKdp/A8VpF9Yu5LXcraXbQ3NwftMwjiTlvU/Sun0JowJrcKrQhiYzIvXPSseK3t9YIS0jEF8ekIPyy/7vofan2N/Pb3YtJ1EY3hW8wkbSD1P0rCvGU1oXC0WbGrWUUEYNv5p3MTJxkD6Cq8E8kto6SvyflQOvJ9vpVq1ubdzdLJKdxOC8b53emKuzLbRwpCGV2A3bn+8K5FLlXK0a+aMOwOxjCZdjZyQMbQfWn3trFNazyRMcOoYkHg+pqfWJYYo0mjQNuXa2R/EP/rVgR31xFMB5jKFXaPUCt4xlJ8yIbS0KKndcLsXC7hhc1HMpWZ1PUMR+tbVve6fJPG91a4KncZIjgk+69Kz72wngYShTJDL8ySoDtauyMtdTGS0KVKafHBLNIEjjZnPYDNRsjI7IwIZTggjkVpcgMUUgNBNAEnlggYb5j2rSsIbe11i1jlcOw+eTsFOM1mRLtkjmY/KGAx61YnBbW2wN245x68VLu9ClofS3wkcS/DmxkXo1xdsPxuZK7euD+DZz8LtLOMfvbnj/ALeJK7yqRIUUUUwCiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAK53x9/yTvxL/2C7n/0U1dFXO+Pv+Sd+Jf+wXc/+imoA+Z0Pzg+1TOf3fNQxjODmpJCdoArNgLGAFz60+PC5Yd6jI2IBTyMRjFIBAcSfhTnwzADvVuz0W/1C3W6h8gRsSAHcg8HHpW1ofg/U59RR3jtZIozuZTKQD7fdrz6eMoQglKSuj1c1/eY6rKOzZ0vgnw6mn6f/aN9GPNlGYww+6KnubqeS6nuCRt+6vFXtSOtPGYRZ2aKBxiduB/3zWXb6fq6xQlobRleTODO3IH/AAGuKeMpSd+ZCpqMY2Ojt5F0rRw8sgV3G7nivNNZ8SWxknUtuYkszLzn2roPEbatqknltHaRRoMBRO3/AMTXIT+HNRMTJGlmueuGb/CtqeJw/WSM5SdtDnLu9EsYZUwjHO0cVlyMGJGBz6dq6BvBuqfxTW303n/Cm/8ACG6nz+8tv++j/hXbHGYZfbRxy5pHO0tamp6BeaTbpNcPCyM+wbCSc4+ntWYOTXXTqwqR5oO6IaaEZiRQDyDS7fmAoZQDxV6CJA565zUkM+zO4fhVX5s4p2GHValpFJs1tM1j7FKyuN8Uo2yKR2q1PNc6bOl1p85a3PKkdvY1z3A+tSxXUsP3JCAeo7GsnSV7o0VW6sza1HXzq8IWWPbKByQeGrHESjJclFHUmhrpm6KgPcgVFI7SfeOauFNRVo6Eynzbl7TdRntpwbWd4+eFHT616p4f10avY/2ZqYEglGORyD6ivH7fCSAlq6vR7tHuoo/NIUsBnptrnrwvqjejPSzNLVopNH1N7Z1AGATjtWJeZYM4BAIyOO1dp42sUmltrlLnl4gGPZiPeuTgdophvxNhWUDqOQa5JOyTXdHpZcvfqL+5P/0kLHToiEuzNgFfukdT7VavdsmYd6b2wCcZ2imB/wB1HmFkUE5Hv9KqG9eOWVlXLbcB2X7n096tKUpc1zzVypEY0q10pmlv7rJP3YYx8zfXPSq13q80doosVW2iJxhB8x+p700oLgiWaUFicnJyTTLqXyYQEC5zxwOK61vqZX7FZkmkjW6Mh8yRscE5J9avw32pw5huJw1oOouRvX8AefyrO+0zxRHy5WB/iYd6ryTSzHMsjufVjmtOVslux0djqWmWlyJ4NNDyDPMjcD3A7Vu2L6d4svN1xEtveIP3e0AiX61xWmyiK5Uum9RyR7Vdtb2CHUftERaIhwyMO1ZzVmVFmq0jaPeS2xtBEiP+8Vh8xPbHtTTINTuJZRMsLAEZY10mr6avi/To7uzZV1KBcPGcAyr/AI1xqsLJwvlnzkbDrIuMdsVi4Jq63Lu9maNvCt5p0tobmKR0YOu08kd6w7krbTyETEZOQo+8B9av6IZJdXhhBG12K4/CsSaznN1IgQ5DsCT7VtTWurIkyy0rzRvO8CDy4wQAvJyQBk9+9WR4iunto4HKtBHnZEVGB+WMVXjuHh0udF4LkRlj1I7j6Vl5wa1ikyG7GrHrl3bTmSApHnjaFGKc+vNO5e7srWdyeWKbW/MVkMSTSDJaqcULmZvRzaDPAyy2ctuzMCWV9xH0z0pV0zQZJEePVpIxn7ssOf144rDOcdKaDl1U9M0cvZjuup0F1oVs5CxazZbV6Ak81Omn6VDereXOsRkoBmOOMtuOMda5+5wrrt9KgL72GcCp5X3C67H098INn/CtNP8AKJMfn3W0kYyPtEmK7muC+DP/ACS3S/8Arpc/+lEld7WqICiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACud8ff8k78S/wDYLuf/AEU1dFXO+Pv+Sd+Jf+wXc/8AopqAPmiM4UU12ywFJG3yg0gbMorMCZj0FSSdBzUBI8wA8gmpphyMUmM63QZ47bw0s8zBIo/Md2PQAMSTXW+FtWM9lNONLv44whmV3jX96AOigMTn2IFcdpNkmpeEWsnYqs6Sxlh1GSRmt5l8RSeGrjSbi3htWe3aNLqB33M5/iIKjaPYE/WvjKvs25JvW/4X/E9vE4epLESklpd9jat/E1vcT3qXFleWk8EBn8qdV3PGOpUqxB57Z7il0fXrfXrRZLWyuoIfKVoZJ1VfMDDJwAxP5gdeMiuJ07RLmz1S4uhY6daLPp8lp5NsW5dipDFioJ6H6e9bmjpPptrZW7ENJDDHEyrnDFVAOPyrKrGjFe49dCI0Kz6fijC1TVNRR9TvoYoDY6fMY5VcN5kgXG8g5wMZOBg5x2rRlKRxNK8gEaqWZieAOuc1FqukanKup2EUcMWn6lOZZGkLCWLcAJFUYw2cEgkjGe9SXemtfi5tLpE+wuqbAjMr5BJbJHQcLjHvW0p0rK39LT8d/wDhjJ4Stfb8V/mZGm6neX9/eQyQpFEkUctuCCH2uXA3/XaDjtmorTU7ltXFjJPZXXyuZDaggwFccP8AMevI7HjpVu18Omx1i6vI7mVw8CJEs1zI53Ddndk8jkY6456UiaXe3Gr293eGyt/IDACB2dpNwxgkhcDvjnnFaupRu7bW/G337/8ABI+qVtrfiv8AMyvGvGkwf9dx/I1xUeM5Nd/4xtIzp1srzAAzjJ49DXMQ2elLjzZJ3H+xKq/+ymvYyytBYez7voy45ZiamsUv/Aor82Y0pwB6mowGkkVR1JwK6yNfDC432N1IfU3qj+SVMreFQQV0m4yDnP27/wCxr0frELf8Bj/sfF32X/gUf/kjCSweJkEiHcRxmorpBHlcfNXe3lncalp8E1j4e1BIwAyT4Misv1Cj+dcnc2hEzicFHXhlZcFfrWSxCvr+TLeT4q2iX/gUf/kjD8k7dxz7U5rZkt/OYYBOADWssEeAGkBHbFJfeXOqRm4jiC84Per+sxb/AOAxf2LirbL/AMCj/wDJGKKKufYoP+f2P9P8aX7FD/z+x/p/jWn1in/Sf+RH9jYzsv8AwKH/AMkVkPGQOlX7FSzL85wT0B/lUa2sSjBvIz/n61JFHEn3byM/TH+NRKvBq36MqOUYtO9l/wCBR/8Akj0HU40vPC9u+wlYW5OSSAa5W2kME+VPllMhWPfj/IrQ0nXZo7SSw2i7Ei7QoPI98VQ1K0ufMMdzA9vI4DBXQg4z15+lcE5Rul5nrYLLsRGU5NLWEl8Ud2vUVL2WWc7yxccYAwKYZpWnkOwKo529adBbzsipHEzsOMqOtPksr6BGaa1mQdSzRkCtfaQWx539k4t9F/4FH/MxT/rHyAMckCoOVVpCoPYZq3LbrLKzeeoPfjp+tMe3jZFJuV2DjPY/rW6rw/pMzeT4vsv/AAKP/wAkVUBa3lO3KjBPtVathUs1t3jjnI8zAYlwf6VVayt84F4oPvj/ABq1iIef3Ml5NjOy/wDAo/5jLPCrNK3KovT1zVqztEuXecIwgjUMxz0ogs0KtGs4dZBghRyf1q5K8Tab9kgxFErAlickn/aNRLEQ/pMpZPi+y/8AAo/5lrS9bnsopJ7dJWmDhgY04x6E11wfQ/GcQS7xYaqox5g/i+vrXI6fqH9nwBBsZMHvgH3rOZ/OkDG6QtuJBHX+dYxrLmfb5mjynF22X/gUf/kjsT4H1DQJF1U3Vvc+Q4ZYogSZB7e9cr4gvFg1u5EFmsBBJ/eqdwzzn0rS0fxFd6bdq8t0bpFG3ynfp/8AXqbxRBLr8w1JtMubULHhnwSpA7k4FaKvC+v5Mh5Pi7aJf+BR/wDkjjZDss40YESFjIST1B//AFVWNX5LWKRyWvI8/hx+tJ9ihI4vI/y/+vW6xFP+k/8AIz/sfF9l/wCBQ/8Akij2xSxuEPNXPsMH/P5H+n+NJ9hg/wCfyP8AT/Gj6xT/AKT/AMg/sfGdl/4FD/5IrFwGJGSKjzukzV/7FBt/4+4/r/k00WMAP/H7H+n+NH1in/Sf+Qf2PjOy/wDAof8AyRDOhynuKjli24561oTWsbbc3SLgd/8A9dRfYI5X2i8RmPQAf/XpPEU+/wCDGsmxknZRX/gUf/kj6O+DH/JK9K/66XP/AKUSV3tcJ8HE8v4X6Ymc7ZbkZ/7eJK7uuhO6ueXKLi3F7oKKKKYgooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArN1bXtP0QwreyTB593lpDbSTM23GTtjVjgZHPvWlXM+MvGNn4TtbZJJrZb++Yx2i3UoiiyMbndz0RcgnucgDk0AX5PFGix6HFrJv0awlYLHIisxdicBVUAsWyCNoGcg8cUReKNFl0SfWBfotjAxSaSRWRo2BAKsjAMGyQNpGeRxzXFFdN0vS/CmqQ6pDqel2mqz3GoX8JDR+bLHMDKduQqiWTH+yCPTNU7xxfHWPEdorzaINfsLoyRoSssUKxrJKoH3lDAHI6+WT2oA9H0jXdO12OZ9PnZzC+yWOSJ4pIyRkBkcBhkHIyOazvH3/ACTvxL/2C7n/ANFNVDw5eW+s+Odb1jS5Vm0xrK1tvtEfMc0ytKzbT0barqCR647VH8R9LvJ/B/iG7j13ULeBNMnJs40gMT4jbIJaMvz3ww9sUAfOmSoAp4UnDjpSpGZ3VB1NWDCIsx571m2CK4IMi5Hep5Ww49Kjt4JLi42J1BqxeWM8YyVz6kVDkr7lKLtc3tE1hLDRlElpcOsUpQum3GWJI7+ldHrPjRcfvLC7RcnBwvb8a5mGIDwRJL3Ooov5If8AGrXiG4jnORjywBtAHAHavn/qdGpO8lufV5th40ldP7Uv0Ks/je028wXYK/7A/wAaji8f2KzR7oLnhhn5R/jXKXxDPIqocY6isoqWPHWvQhlWHa1v958zKpJM9fvvGdrPAJ47O68vJG75ccfjWVJ41tY1Lm1ugCMZIX/GsVdJ1XTfB7z3qLFBJxErthsHvisK/uwZthHHljGB3xSjlWH2s/vCVRnbHxpZBTI1rdbQ4UnC/wCNMufGGneeA0FypI9F/wAa8/Fwfs0kTE/Mc/jT7W3udRuVWKPzH44PAq/7Jw67/eR7Rnqsw8N6uLKKd2SFZN8rSTpjG09ApJ6kVoR2HgC3A2xpKfaN3/pXP6DoOsS237qXT7RFO0hbfe4/E/41e1eyurC1igbU55ZHbcSAEAA57etdVClGhHkhsJu5ref4Li+7pIcev2X/ABqeC38C6sPLFjAjey7SPyNQWel5tU+0T3DSMMn96QB+FQ3ugRTruikYSDkFjyPoeo/UVpcR0MWl6r4bsifD0qXunck2Vy27Geuxu30rmNVtBqV39pbSxDI6jzEZQ2D9e9a/hTXrjTrv+z78k4HU/wAS+v4f41L8QFv9OMN5ZTMkDna4X17Gjcd7HKHQWYcWSY/3BVdvDCMcmxTP+6Kq/wBuaixybyX/AL6oOtagf+XyX/vqtIxI5kTP4XRhxZL/AN8iqx8I7m/48Pypf7WviebqX/vo1ImpXZOTdS/99GnyvuF0M/4QsHrp7fgf/r0J4Ity2Gt2Q+7kVcXUbo9biT/vo1IuoTDlpGb6mjlYrlRPBclnKs9o5V0OQDJ1qTULO61XXrS2vosSPBgAMDwNxB7elWLi9mmh2xS7Gz1rBvbm8t75ZmupPNRPlkzyo5/+vXLXjs/NHrZVL3qq/uT/APSWal74DvuHtZYmwPuEbaq28GvaPIBLbTmHG10dS6OvcU6PXNSSBJP7TlIc4VgAfpkYqceJNftYPOknhePvuUEj6itXG61R5qk1sc5qunxCTzbQP9nI3Oh+9GfQ+3vWLJI0o25+UdAO1dhc+MZpXUS2Fpcq3UquP1zWc954b1RyskM+nzn+NCGXPuKcVZBJ3OZHAI9KdMhDqT0YA1b1LTZdPmG474X/ANXKo+Vqsf2VJLpVtcAhWLMCHIUBeMHmqcrCSb2KNtN5d5E/JCsOldGosFsGha5JR3LN3K89h3rHsvsthdpLNIs5BwUUZU59TV+1YzxTLEsSbckbVJ3Cues77GkVbcvaZaxmZ7Zis8Q+ZG24wKrNoZhnlLYQAEqFO4/WrlhGsT+aUkVCMMc5ANVp5CZJbaAhM8BiTk1zKUnJ2ZpZWOcnISXdG5cnqSMc16l4c1OLxH4Rmsb9STEPLYjqRjg1w1tpYWydrlMMXwfUj2rT0G+j0fU45LckR3B2OjZ+764HpXRKonotyIxa1ZyN/aPYX01s/WNiM+o7VCGJXHSu68daYJnS5s1M0i583YM4XHU1wWRiumnLmVzOasxy96OAcU1TikJOasgeRzgHilRcuM+tPRBxmlRczqOvNAyS8GGVe1JZDF/H+P8AKp9Vj2svGKgsf+P6L8f5Gsqv8KXozuy1f7bR/wAUfzR9LfCD/kmmn/8AXe6/9KJK7muG+EH/ACTTT/8Arvdf+lEldzWkPhRzYj+NP1f5hRRRVGIUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABXO+Pv+Sd+Jf+wXc/8Aopq6Kud8ff8AJO/Ev/YLuf8A0U1AHz7p2lzWswnlAZAuapXDfabwmPglsYrq40VrLY7hTjrWYNNt7eUHLMSc5FeZHEaty3Op0rJJC6ZYeT5jzLhj0NX0iVkYH5s1KjfuQmMioZGkhkU7PlrnlOU5GqiorQhUj/hXxHf+1B/6LrN1eVZn2h+EPQdBirKuT4PdM9L8H/xw1kan95jjOB8wz39KMMryPpM/dor/ABT/APbTDvJCJGyxbcPpXQ+B9Bgvp5dU1AgWNp8xz/E3YVy05LS8jn0rd0qe6a0/swTFbV3DOvv3P5V66Xunx7ep30WlL4tEl/qRkFqQUtYlbG1f731rz3XtAfTb14ROLiQnKhOSF9W9K1L3xhP9o8q1j8uKAbImVypGP0Nc6dSup55FMu3zmzI+etJJoHZlFYXaVYwpLscAetdPa+GvEdhdRJaZSZuQEkxj6+grFmmifUWkQlY04Ur1471tad4z1axJTel0DwC4+b8x1qmyT1fSI7200hP7WlhM6Al3jGABWJqEsep6xEsTh4iyopHcdT/KuXvfEGuSWp+1NhJI/mjCBQM9B61lWepT211zuEajDYP3cnms7XGz10r0xxSEdjXn8ur6jIirYXRJwSVByAKt2Xii8t4FW8VGbuS1Kwzf1e32pHdRD95Ad31XuK6ohPEfg1o2wZAnlk+jL0P8jXl+qeMIpPIKKVVHDOPX2rvfA2owefcWAcNFcKJoD6jGCPyx+VFmg0Z5XOjW87xOMMrEEe4OKZurqfH2kHTtcaVVxHP84OOM965Q8VtF6GViRWqZGqslSBqpAXUY1OrmqUb1YDUAWNiP1yD6is+8s1nvVgY7w8Z68etXFeoHb/iawn/YP9awr7R9V+Z6mVfHV/69z/8ASWc6llc2cht5HdEB+UlcgHtmrtvLJMZ7We7WO4PCAgFSK6XbHJjeitj1FZWoaDbSEzwARyA5GO9atHmJnM3WkahppLND5kf99Dkf/WrMCyJcKzBlO4EV6roaw3dn9kmmzMgxkjlh71ia74OkgXzrJ1aM9fY/Sl6juNsdYeW3+x6laNNaMOQ6/qDVfVNANxbC502cz26Dox+eIf3W9vQ1g3Ty2cLWd1HKjr90xuQD+FaWjazLp7xSlzG/RSe/sfUVDLRhxkxXG2VM7WwymtgwOdVC6ehiUgHl8fWt7UNFs/EMJvtNUR3SDMkC9/dR3Ht1rm3hNnq8T/eXIOW4BqKhUdjppVfegRlWOTCswOc1HJstQ++MgYysjckGn3kzrDbORG8f3mOcL7VhapPINwEymN8MqZ5HrXBTi56G0pWHG/e7DwwbxMAcAck1XinmguI5WR8xruBYd6qQwNua5aRoo8EhxwSfQVHc3ksw2GRjGBgKT2rujTSehi5dz0nTtXku/DwaC1D3BbbIIjyc9SPwrzrWtMexvZWTJhL8EjBU+hHrWx4Q1FrLWIkllYRXC7MZ4B7V0PjPw3a2mmfarGNzIDiYg53D1oj7krDl70bnmy9ORSkU/AwcU3H5V0mJJEGfgVLaqPt8a5/ipkcmzIWi24u1PvSYy/rhAlXaciqton+mxN068fhVnVto2AcmorUhrmI9OvH4VjU/hP0Z35f/AL9S/wAUfzR9H/CD/kmmn/8AXe6/9KJK7muG+EH/ACTTT/8Arvdf+lEldzW0PhRy4j+NP1f5hRRRVGIUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABXO+Pv+Sd+Jf+wXc/8Aopq6Kud8ff8AJO/Ev/YLuf8A0U1AHjaJGAM5IIpzlEdRjjHFUkE52YQ4OOatzRsJFDDtXgy3PTuWZGhCqVPOKz7u5kYfd4HSniMbMFvmzwKfJZq8S7jzmhNJ3JdzIVv+KYkGel4px/wE1zV9KwLFmO4ktjPeui3AeH3XPJugcf8AATWdpvhm/wDEt9JDYmP5OXeRsAf1Nb4LVs93iN2Uf8U//bTCtIWnlMj/AHF5YmtA3S2do21v3sgxx2HWvTYfhpB/Yq27yEXSjll6H1qlc/DnSZmLGXUIG7lo8j9Aa9FyPk7Hk8jF2zTtu2P3Negz/DGIZNtrULf7Eq7TVCT4Za/sLxpbuAeiyc4q+dMLM4qtbQdWGk3wndFZcYPyBj+GelW7vwR4htM+ZpszAd0G7+VZUuk6hCcSWcyf7yEU7oRtxa2J55JrstJANzRxhQCW7Zx2qnpqXGpaqlvAoM07BckZAJPJrKWKfOwq/AzjFdR4VksNPWS5vzJBOrDyZACMcc1DshrU6+1+FDk+de6yY27iCPn9TT734U2irus9buBMAcGWMFT+VVpfF94oxZX/AJqj+8N1JD4/vIztmSKQ98oVNJMehwWu+HNR0G+EN5tcNkrIhyrfnzXaeEpF0OHS75XYlZsTZPQN2H4GofFGrN4gSyHkBArfNg54Peufs9fVFu7W6B8qVt0RQfcb/DgUO7BJI9o8e6aNU8PtPGN0kA8xcdx3/SvGh8y5/OvbPCmoJrPha2L/ADER+WwPtxz+GPzryjxLpEmh65PbEEQyNviPtTi+hMkZQOKeGpqtbgEyzFPQBc5qNrq2B+VmYfStEyLFpW5qUSVmnUIV6K7fQVGdVKyHEJ2Y455p3CxtK9Rs+b+I/wCyf61ljWgB81s/ttOacupKf9JMMgCfKV7n/OawrvSPqvzPUypPnq/9e5/+ks6FJKm3krgVzya9ATzDOP8AgNaltdJMiurqAexYA1toeWX7WLy7kXR+Vh97B61YOu2kLFJ1KH+LI4NUvtscQG51GfeqOoNb3Sb0uIw45xuHNJsLEGrva6lC8UaqzKcoy9q5RnwTBchlwa0LiRfP8yIPG2OQBgGop1S6s2KsrSr2PBqWky07Edjq9xYTK8MrHYflPQiu2ja28RwJOERdQXnbnCzHHf0b+debvtQgFWU981sW2opbrHLbOQ4wHj7N70pRuhp2Oo1FbdbNtOdkt5CuTuyMN3zXOR/YIWAlZ7hhx8owAfx6iuvtb/R9atVvNTtnm8gfNs+/jvnByRWHruiQ2TJfWFxbz2czZj8pySoPTIPNY06PJoaSncx3Zb2RFlvBFhsBHXCqPbFQ3thLbzNn/V7sI/Zh6ioG3zXhVeWLVNcyTW7qp445VuQfwrWzTsib33HRwKbot9rSMQgFW65PpXqkOr22o6Ck0jrsMflzKB39a8oVLe7HysIJAMndyp/qK6XRZZNLsf8ASZAbcNkqhDBkPU/hWdRdS4djndYsW0+/ljUExbiEbHBqpsxHur0DWNKk8RadGtg0e1HLpnjjHJNcDMjxRBXGCCQRW0HzIykrMEICZIqSxCveLux+NRqV+zgZ5p1tEz3CBRk5zTewLc0dajh3q0J7YNU7Qj7VF6jP8ql1LK7B370yBNt5CexB/lWVX+FL0Z3Zd/v1L/FH80fR/wAIP+Saaf8A9d7r/wBKJK7muG+EH/JNNP8A+u91/wClEldzW0PhRy4j+NP1f5hRRRVGIUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABXO+Pv+Sd+Jf+wXc/+imroq53x9/yTvxL/wBgu5/9FNQB5IHEe1MjpSPIFkG9c56VG/lvOqqeSMg0ydGTBLA4r51xPTEdI5Wzuxj0oPA+ZjjtVu2Q3SoihAoGSapzjZJJFuB/nTS6C6XOfyf7NwQdvndfwre0ixtoTLeWpdXckAk8gdqyNQ0+O1tlkQuSzY5PFZ0SlkyTznHFb4ZyV+VH0ObPB4mlCpKo4puTXu3/AJb9VsegprOq27ALdOR2BAP860rLxDqc7+WCshAySY8AD3Nebi2Tfje/HvTpLRHgPk3CpKO0p4NdPPU7fieJ9XwH/P8Af/gH/wBsel3esXDploIpowcOyJuCn0OakszBdxEw4jbupBH8jXl2mm2lLrd210NuAPLlAHv1FbMVn4ffHmNqcX4q38hRzVOsfxF9XwH/AD/f/gH/ANsegveS2kYMm1l6Z3H+uaamoWMzAusqt6K24fln+lcFdad4fEYMF7dE55EnH/stRR6Pp8ihknlZCequD/SmpVP5fxD6vgP+f7/8A/8Atj0MWmgzzGSa3tmlIxueLacemcVjap8OLDV1ZrK+aIk5VFYSKp+mc1zB0vSftHlh77Gcc4H45x0q+vhjTGGRPc4/3x/hT9pUX2fxH9WwH/P9/wDgH/2xk3nwr122djAYJlHTBZT/ACrNbwV4phc7bbBHYyrz+ZrqT4XsAeJ7rH/XQf4U9fDGl7Czz3YA6nzB/hT9vU/l/H/gC+q4D/n+/wDwD/7Y4i4tvEemRs91p06xKPmcxkgD6jismOS0uJ082Pyh3ZD/AErpbzTWmvHj0+b91nCicZJ/Ef4Va0zw5cNcKNQgdoepa2YEkew5P6Vaq1Evh/H/AIBP1bAX/jv/AMA/+2Oi+FWqSObuykDKv31JU49D/Suw8U6Da69pzLLKkcsQLJJ6cVy9r4Y8K3AxLd6jbv8A3ZZFH81q5cfDzR3sZpbG8u5pQhKL5ikE9ugrPnqXuo/iV9WwH/P9/wDgH/2xkQfDH7XZQXBvoy0kavgoeMjPrSH4XTIPlnt2H1Yf0p8Hw9umsopSJS7KCyLIAVPcYIqtN4TS2OJxeJ9T/wDWq/a1f5fxJ+rZf/z/AH/4B/8AbEg+HF0vQwfgxpjeAzGf3k1up/3j/hRFoGl5/e/bWHfZMB/7KavxeFvDM3ButRiP+06kfotHtav8v4h9VwH/AD/f/gH/ANsZU3hC3jTP9oW6n3yay30i3TWILM3sZjdCzShOFPPGPw/Wu1T4f6FOuYNSuCf+uqH9MVGPh0ttMJre7dyucCQf4VEpVJ2TXVdToofUsMpyjVcm4ySXLbdW3uznv+Ec03P/ACGIAfdMf1qa38F29yT5GoWch9MAk1p3XhWZcmW3z/tKCaqJpNvbHJ35HqcYrfmPFsiGXwFIgLZgx7J/9eoIfASzk/6bZRk9mmYH8q1UmniwIpZAB0+cnFNeaWQjzGLAetO6JsUz8PpQP3LRzMOnlyByfpWPceGWgkeOeEo4OCHjwRXRJIY33xPscd1baaSW7mkOZZZJP95yaVyrHGXfhkXCgGfG3phBVJfB7K2RdjPY7K7p3hck+Rz/ALxqAQ55oU2gsmctYaHqOm3Pmw3MMgP3lfIDD8qb/ZN8ty8aRYtnJZVVwdhrrFgU55xikZAGIHIpqoFjzm6s72xvGZ7aaP5uC0ZxStbTXaTTu4URrnnqT6V6QlzPFwHJX+6wyP1prixnP7+wiBJyXi+U0udXHY83hthHtluchT92PoW/wFbKwK2nm4N3HE+Nojxwq+mK6aTwtpWoXBliuDHKe0veqM3g6/sbe48uBbpJF+Vk5I+lROTexUVYm8HXiQu9pPLuHSNwRjB+tYPi+w+y6i7RgGJzkFeme/SqkKXWm3ANxHIh6MrIRkV0DRQ3+lMoUblOdwA5PrTT5ZXG9VY5WDTbi6lSCFMyFd2CetbsejvaWVvciIJKuRLvbB9uD/Sp9P0sz2xuRO6SK2FxgYrW1FQtlI88MLAthFz0965quJaklcuFNWucTq0YEi+uOaZAuLq3OeMH+Va+raPcyW8TxJG7gHcqHkj1rOjUrLbZXacHIPXpW7qRlSduzOjL42x1L/FH80fRHwg/5Jpp/wD13uv/AEokrua4b4Qf8k00/wD673X/AKUSV3NdUPhRx4j+NP1f5hRRRVGIUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUVn6rrVloyRteG4/ekhFgtZZ2OOvEasaANCisKXxjoMWm2t/wDb/MgunZIBDC8kkjLncBGql8rg5GOMc4qaXxRosOiwaw1+jWNwwSGSNGdpWOQFVFBYtwflAzweODQBr1zvj7/knfiX/sF3P/opq1dL1ax1qyF5p84mhLFCdpVlYHBVlIBUg9QQDWV4+/5J34l/7Bdz/wCimoA8WubpYzGqKDjADCmzSnySXfFZswkUR5bDCrImSVFRsEnua8Hlsd/OPhv2iQEMQvQ1BJqERuFKn5u5ptwUSB0XB4rntk8al1bg9q1p01IlzfQ39SnE8CMp43dKf4Y0ttX1EQf8s1OXPrWQlwXt1ib7wOa6SwmutE06OWydUuJX3MxGcAVrQjyykn5Hbi3fA0PWf/tp29/4FtHQC2JjkCdfU15xqVjNp968Eww6n866m3+ImoRBlvLaOTIwHjOMfhWb4mvodT2XMWMlQcit2jyjBR8DDGpRJgcNVESGkEtNEs0lnfHHSiaTzYhGVZucja20g1RSYiniUkmnYLlj/hI9Utmayc+YsgCgsckD2PatKG+uEUbLhjjs6hv8DWR5gYDOOOlOWXFGnRBdm9/bM2MPGje8bbT+R/xpZNTiltTGRKhb7xZc/wAs1h+bmnK+D1pILl+1RHlJjljY9AA3P5V0WmzNA78biAFPtn/IrkGkXHzKG+ozUS2E01stzaSsspOcB9vftVb7gmetwQiaIBtpBHcVDcaAOXt12P6xPtP6V5lFrniPSgAbm4CD/nqu9fz5rd0f4hX87ukiRSGPG75fX3B/pSsO50THX9PPyXU5UfwzJ5g/PrTR4v1C3+W80+O4XuYWwf8Avk1YtfHNswxc27J6lTmtNNV8PammJDCc9pFxRoBnw6j4e1QZlha2k77kKEflUr+G4bhN9jeqw6jPP6ippvDOlXi5tLloT22PuH5HNZc/hXWrRi9ldpMOwB2Ef5+tOwxJtE1K2yfIMgH8UZz+lQreXlqceZLEfRs/yNSf214i0nAu7WVlB5Lrkfn/APXq/b+MbG6QLeW6DPrj+TU9QKS+JrmJ9sipKB+Bq6msaffJi5twM/3l3frSGPwxq82yFQsp4DRkqCf5VBL4SCHdaX7rjnEi5/UU7Eksmj6RdDMEhjY/3H/oaqt4ckifdFMky/3XGKRNPuY3KzSQNj+JCc/lUoWWH7kzD6Giwit9ht45l+12bqnQhCfzFaDeH9NmXMRdD23NmmC/uFGCUcdwwp63kT/eiMbeqNScQuRTeH4cDy4EYj+7IRn881n3GjJ5DmLKzKMmNnwa2xeKP4iarzzrMRuRCB60uUdzjSxDEZOehzQM9hXTypbsMGKP8hVZbO3ZuAg/KizC5g9aQgVsS2UIPBXFQG1j/vLTsFzPBwc09b24t3DQzOmPQ8flVpraPuwqJ7aMD71HKFy1HrEF1H5Oo2kcyHguFGfy/wAKik0HTpInbT5fLV+duSVB9u4qi8YX7p4+lNSV4G3Jkeo9am3kNSZYHhoQWcWwb7iNid6McHnvVO4sZ7RyZ/LkDuCiN255qSTVbmAGRCWABOz1+lU7vxGuoW6RNAvB3o5PKmsJ4e+sWaqr0EmZbdmSWUQqxOGRcAfj61i3cMRm82ObzCGOc8k1av5xeqAhCyLj5duQff261cvYbe3sHMcW2R1UMS3U8HgfhXLK9NWe7O7LnzYyl/ij+aPYPhD/AMk10/8A673X/pRJXcVw/wAIf+SbWH/Xe6/9KJK7ivah8KPOxH8afq/zCiiiqMQooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArD8TatYadZJBeeIF0SS4z5V0fLB+UjIHmKV7gYIzzxW5RQB5D4bvINE1XS9Y1V/L0orqVvHqc6sizSPPHIJ33fcMgV8HgHHGAQKmsXGnnRPEV4rwaKdc1C5EkiFVhjmEgikYfwqSTyenmD1r1iigDkvBUi3l54k1W2z/Z19qXmWrYwsoWGJGkX1BdG574zVf4jwa63g/xDJa6jp8enjTJ/MgksneVh5bbsSCUAZ7fKce9drXO+Pv+Sd+Jf+wXc/8AopqAPB51aU72GAf0rNnn8lgVOcVskrKiK54IGQKydStViRvLX5j3rxqbV7M7JK6uLbTeevOOtJd2wWRlQdRniq2l284MgatV4iYkkfqnDVTajLRkR2KiwiPSSWQbxKBu74waxtW8QXVzMUgYxRLwoXqa6G4iRdGMqsfnmHH4GuQtbcTTlmRnAICoB95uwrfDWcpN+X6noYz/AHGh6z/9tLNjqV1G6/aNzxNxkj+tdBHJhCp5B5FTyJKtg0FzZrG8CDeuQSAehx2FUIHBjwp6dPpW7S3R5KIZJdmVFQK5LZzUjxzSyN5UTsP9lSabtZTh1I+oxS2HYcHxTlk96DtxTSQBRcklWQiniTPeq6sM0E0XAuB+etOEmKpbiDUgl45oAtmTg/Sowsgj/czyRPjqjY/SoGlG3rU0b/KO/FMC1a3mqW6HddrKcYXegq9YWhlkcqFMsrbnKrjJrOSTbyRXT6Rq+kaZaq08bzXLckKOE/GpbGjRsfDE90OSAB1JrRbwvPboWVdyjuKNP8c6QihH82M9yVyP0zXVWWrWN9Ev2a5ikz2Vh/KosVc40W01s3AZT7cVZh1e/t/uzuQOzciuxltoLhSropz3xXHazGunSsH6dvenqguX38bxWNjJLqFsGVFJyh+97YrzSHV/Des311PqU17Z3E8m5Htmwqj0x3rVkujNcZfBAHCnoK5e/wBGtJTcRwr5c4bzIyOdwxyuP1H41cGnoxXOrsvDE1/mTw74liupI/n8meIq4/GopvEniqyZ4rvSo5imQzpuUHH4EVh+DRJpmu294upRxxq2wkDO4ccEEg4/CvWPE0SJpN7fWoDrNAzoV55xz/n3rRCPPLfx8+cS6U/PeK4Vv04q2fG9gOZ7e+h9d0JIH4iuAZpX3bSePeoBcXER3DeF7spxV8qIuejr448PscG+2H0aNh/Spk8XaC5G3U4OfUkfzrgoGkbDSNksMhjyfzrUht0urOaGVpDMBvifrux1Uj9RQ1ZAtTtF1rT5FBS8hYHoQ4pr6pbY+WVGP+8K53wpqtoc6XeQwOHz5ErxKWQ+mSPy965vWbvWtJ1SW1ku+AcowiUBl7HpSVx7HeHUlM4Zp0EYUjaCOTUc18Gx5d2sRB5xg5HpzXnP9v6iAAbuQt3OFx/Khtd1Ig/6Y+PXA/woswuejSapbEcypj/eFUl1GGMIrXgcrnJJGW+tcD/beqPyLt19OBTTrGokf8fcvPvT5Qud3Lqtt5isLoghs4D8H2+lV5tWtmnWT7Ryv8IbiuGOrakzEC9mwO4c0n9o327m5uM56mQ0coXO3fWLZn3l8nHAznFVn1e3yx3Stk5wEb0x6Vx51G8PBu5iP+uhoNzO/WaX8WNHKFzqZNdhGf3U5/7Zms6XVY/J2w2kpKlmDFccViF3PWRz+Nbfg4CTxNbwSf6uZXib3BU0cqQXKtpqdw9/G+1QpO0rjOQa6bUZJZE27QVXBYjselYdhYZ1FAsZKrNsLH1Brp/FVsmmamIhGQLuJJQSehxyPzBrixcFJcy6Ho5U7Yukv70fzR658If+Sa6f/wBd7r/0okrua4b4Qf8AJNdP/wCu91/6USV3Nd0PhRx4j+NP1f5hRRRVGIUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABXO+Pv8AknfiX/sF3P8A6KauirnfH3/JO/Ev/YLuf/RTUAeC3DlHDDnA5qv5nnyBZMgdTXpD6Lp9xFhoVUsOTWc3g+FGLwSnJ7MMivHlTa2O/c5WKFImeRshCMj3qKW4dLeTbgqegNdBqXh+7ADJHuK9AD2rmr6zvYn3m1lCnj7tRGLvqgaS2JLk/wDFOxHGCZs4/A1zunKYUeVgytGjNG3T5vX8q37hmOgRblKkTEYI56GqcVk721vaPLGJlQTRYP3lYfdPvXVhtHJen6nXjdcFQ9Z/+2lzSdVgkS/hu1LzvbhY278f/WrOdPscrwklvLOOe4zVHbNb6ohClWQfMB6DrV+6lE4EpyWdeSe/ofyrqcTyL3Oj0nxzf6TGsKQ2z2/9wxAfqK3bfxl4f1FimqaYse7+NUDj+Wa82iYSQDkZU4NXFiUIG61D0KR6K/hjwjrK7rC+ijY84STBH/ATWDd/De+AMllcRzx5O3nBxXL52n7ua09O17UNPRRbXkyAfw7sr+R4pAUrzw/qmnsRPauMd8VnMGQ4YFT716Da/EC6A8u/tIbhO5Xg/keKsnUPCGs/LcwfZZG7ldv6jii4WPNg/rzRuGeRivQrj4e2F4hl0vUlIPIDYI/MVz9/4G1qxBb7N58Y/iiOf0ouKxzbYwSPSp43pstu8TFXRkYdmGDTrdd0iA9D1p30JLyxeZb5DfOT93Haqs89tbna9wA3oo3Gs3VL6SG5khhcqp6+tZOWZuMkn9auNO+o7nSQX1uz7ROhJ/vfLWkkskbqY2eNxyCDgj3FcilhdyLlYTj3IFTxy3tiRG4dVborcj8KTproK56roXjq4tGW31JjNCeBL/Ev19a2tUvo7+HznAK4/dj+teV2lws6A9x1Fb2naoZIhbl9w6JWcosdyC5kMVywB4qhdmTesyEhgcgg8gir9/EyfvGFZtycxoacdwKUlpJczefaLl2OZIVOCD6qO4Pp2rs9B8YrpVr/AGfqiym2Y42OhzF7jPUe1cgnDZrXg1W8iQRidnjHRJAHUfga057CNyfRfDF55zWevWlusxyUfoM/Ugim23gyzZRGmuaTPEwwy78HH5mqsGoCX/WWOnufVrVf6Yq2jWT8vo2nn/dQrTVQLEcnw7ugNtrfWUka8KPO5x+VX9M8E3yXCLcIQM/65JVZR+GQahMWksMnRIv+AyEUkdvo0jbf7NeNsZ2rOQcetPnFZFXxP4GuNP1YX9ukhsXbdK8MZ/dH1wO38qyfFkcep6OlykqS3NpwzDq6Hv8A1+ua6gQWUPMK38eOy3bCuK1t5bbWnu1Ez2jjy5BK+5vfNCeu4M5EK7KGAA29STzihSACVww5Bq9f2Zs7jCNutpBvjfOAV/xqn5W8Hyzkg/StBEYI4YHOeCDTWySgHUmnhOnbvzTnjEQG8fMR8o7/AFoAaqhPvZLg/rSF+ArDOD9KQg89SaUqcDuaYEf3X2gcVKMBAT+VIIJGORG5+gqUWtweDFJ7fKaTaAhDFsgevrWv4ZcweI7KUDdsk3EdOgJqpHpl25+W3lJ/3TWjYW02kNNe3ShCsRWIEjJY8dKTasB1kvivw3psciaZp1wbmU72klbhGPPABHSubutVm1S4hknMjMMgNI2TWf8AYpbiGG4RTtI2u3oamVDHcxjsc/yrCpFKnL0Z6GWu+No/4o/mj6F+EH/JNNP/AOu91/6USV3NcN8IP+Saaf8A9d7r/wBKJK7mt4fCjlxH8afq/wAwoooqjEKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArnfH3/JO/Ev/YLuf/RTV0Vc74+/5J34l/7Bdz/6KagDjE4Cgk5xU6SlRgmolACL3GO9JznOa85s7krllnLLwRimbVP3gD9ahCtj73401i4wQcmgLHL+M7PydPSUYw0wGAMY4Ncc9tc3UdtPBLbNIsagKJQrDAxznvXceM5C2jRg9PPX+TV5vexPH5aleZFDLjnAxToL35fL9Tsxv+40PWf/ALaaojksGluJoTJNMgCRgBj79O1Y91PM8asVMb91Pb2rsvDEHmWE0M9tmC2Xckzd371yGobi0xYjiVunfmuqO55D2K1tMNzDoG5rWtsGM4fNc07BJARnFadoYZh+7cxyegb+holC4kzXIPamIpC1EstxB/rEEi+o4NOhuom+XfhvRhWdmi7k65HNKRnmkUg8k/lThhjxx7UgFhmnt3DQyyRt6oxFb1j4y1mxwPtHnKO0o3frWCcdKQhj06UgOym8Y6TqEB/tjRw5OBvixn9f8a5LUBYrdP8A2a03kYBHmgBge44qrN/qiPcUsw2wM467KaA5xo3u71wvJLck9hWtClpYwbgwaYjIyM5/wqtZ+XCpDsMnlj6+1MR7T7YrzGR4mOXCcGui/RGZYF/cux8lQAT0q8s0c6RpcQlJR0z3P9Kh1I6ZFNHLZW48gqMBmJOe/NS3XkLbQTJI7RTZ2B+XiH17ipvfoUU2d7G/8zGYs9R/Wuy0DxFDY3ShijW7H50ZRx7iuPkZQhjcrwPwNNs2EkTLnkDbn27UpLQR7wsWjarADGtrMvsAaoTeGNJudwNoi4OOOK8k095FJiWV0bsQcc1YGt6xaSkJqFwpXjG8kVmkNnoM3gXS2OU81Po1U38CRD/V3jj/AHlBrmIPG2uw/euVmHpIgNaEXxCvwAJbSB/oSKdgNMeELqE/u7mNh/tAinDQNRj6CJvo1VE+ITfx2H/fMn/1qnT4gW5+/Zyj6MKOVCJxpt/GMNb7vowpPscyP5rWrhsY3bckf5zSp4805uGgnH5f41KPG2lt/BOP+Aj/ABp2C41kMahpEdQe5U1T1HSReQFkCsxXp2YVor4x0s9pv++aH8WaQwxiX/vikFzzSS01LTC0X2P7TZls+VIu7af6VA0sRHOiIPYlq7y913TZSWiSUH1IGDWNPfwuTjdiq5xWOZ812OI9Jt1+qsf6092vQNw0+2ye/kEn+dbf26IdSaikvosHk/iafO+w7GKh1HfuWyhz/wBcF/rUjNrbD5Y44x/soi1cOowq3JpG1SA+v5U+Z9gsUwNYGMyqD/vD+lJ5OpuxMl+VHoGNSvqEDHuaj+3wDs36UXfYBrWNw33r9j9WNR/2Ymfnus/8BJqyt4r8JEzH2pk87g7TGyN1w3pTuxWNm3m0628Om1W5lNx5m4qU+UjtzWR5qPcAA554qqZC3en26Fp1YD7vJrKr/Dl6M7st/wB9o/4o/mj6J+EH/JNNP/673X/pRJXc1w3wg/5Jpp//AF3uv/SiSu5reHwo5sR/Gn6v8woooqjEKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigArnfH3/JO/Ev8A2C7n/wBFNXRVzvj7/knfiX/sF3P/AKKagDhhIdi/MPTNTCdQdrA59azIZg6oCwyatsVIXkdO1eYz0EWA4GT8xHfFAcHK4AqoWZiNjYpDuUZ3U0BleMv+QPEP+m6/yauIawkOkpcsxZ9xxn+BRxXaeLWD6PEe4nUH8mrFhMY1O70q+wsSligHHGc5p0X78vkdeMX+w0PWf/tpjWmsz2Gp28YYm3WMo8ZPDFh1/WsrU50nuZGjAAPX0zXUCz0xdKlurvAkMkjIM4LLjCgfjiuKkchBk8nk12Rs9TxmUpOTgVoWej3144EcDgerDFUQSJAQcEHNdvovj0xGO31O1Eq/dEsIw4/DpVSbWwlYopomt2iZVWdR/Cy5FVZZdrbb2xaMjgsoz+nWvSIiuojfp+oQzg8+TJ+7bH8jVS8t4gPKv4GiJ6GVcA+wPT8qzUirHCIu9f8AQ7lf9wnH6GpFa5i/10Q/3lrdufDNrM2Ytgbr7/mKoPpGp2jfunDr/ckGf1/wqtGGpWFxE/RsN6Gp4I/MBJYA/WqlweovLSSP/aQbl/x/WokgRubS8Un+6xwf1qeRDuXrhCkZyOlVr66CWnydWXb+OcUyaS8jhaOeMkdNwqrd3EUsUKISWDZYY9KXIFxLW0tZ7hIZW2srDcM/eHcD3p3iGOGPVGNuuyN1BUenbH6VLcaOstkl7YzeZIFzNETyD6j1H61VSYXsXkXJxIPuuavqSyvkyWcag8hyMVYkuFjuB5kfmCMBUUtgcdaphXhmEb8Ddk/40soaa5WFOS7/ACj3NUIt7hdwswRFIbhR29vp1qOykEdwOoUnBB7Vs3kNroehpBJEZLqdgzSr91SDyAfasaYLHPkdDSWugzR3GOXcKkuSJGWQdCMGqxkDQhqlR8x7ffNZ26gCoScYqkuqAPIHWJVVtoyCSa1YfvimWumeHJy5u9Xltptx3LsBGc/SiNm9Rmemph32/Z0+u/Aqd7gxpG7W5xIPl2SA/oea0/7C8OsAsfihQBzh4xSL4X0pnyviWycdtyD/ABq+VC1Mw3Oxd7W1wqeuB/jUR1aALlUkP1wK3/8AhEYJlVY9e09lUYUFRUMvgC4ZcxanYvj0Yj+lCSCzMm21E3U6QwwszscAZFaAS483yzEd3oDn+lN0rQ72yujcL5Um0sg+b8Mjit1kvGHzRxgAf3//AK1RK19AMKQuuQ6lT71CWJp08/nSsewOBUZ6UkMuaHpkGr6y1vcFwgj3fIcV07eENHiwHEnJA5fqaxfB3/IxP/1xrs7mN2nikTG6JtwB6Hgj+tXcRiDw5oMciho1JY4ALnr6VNHo2hqCRZxYDbcspOTVr+z33u/mrlny3pjcW496mj05VnWYSMHDE8dCDnjH407gVvsWjRhttrbkrkECMHsT/SlMGnxsNlpGMoXyIwMAd6sS2kJd2kmwG6AsAF4I4/OmyHT2GGu4h8nln94OlJgS24jdAUQKPpXAeKDu8Q3I7AKP/Ha7j+1tItQQb6BTnkB8/wAq4DX7qC51q5nhlV42K7WHfgCpSdxmYVOcL1PTNWrG4dFmgZtzSFd23oNtUzPGpzvH51cjmkRo4owqxSks2ByT1pVf4cvRnZlv++0f8UfzR9CfCD/kmmn/APXe6/8ASiSu5rhvhB/yTTT/APrvdf8ApRJXc1vD4Uc2I/jT9X+YUUUVRiFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFIzBVLMQABkk9qWue8YWOr6lpkNnpkFtPDJMPtsM1y0HmwgHKBwjY3HAPHK5HGc0APi8aeHp9FbWItRD2Kz/ZxIIny8mcbVXbucnPG0HPap4vFGiy6JPrAv0WxgYpNJIrI0bAgFWRgGDZIG0jPI45rzHSri9tLmHU9Wsbe00vT/FF09w0M5lWIvFLHub5FwiOyDd/tZwuKu3ji+OseI7RXm0Qa/YXRkjQlZYoVjWSVQPvKGAOR18sntQB6PpGu6drscz6fOzmF9ksckTxSRkjIDI4DDIORkc1nePv+Sd+Jf+wXc/8AopqoeHLy31nxzresaXKs2mNZWtt9oj5jmmVpWbaejbVdQSPXHao/iPBrreD/ABDJa6jp8enjTJ/MgksneVh5bbsSCUAZ7fKce9AHl9lqJv7ORbV1iuR8q7hkBvX6Vo6Q96YnXUHiZ1cgFDjI9687a9ME4miPzA5BAwDXRWPicmVjJGPKwMY4JNec4s7VJHTveQ2svzzBlchRg8gmpZZHAdgeg4BHFcrd3NpbzSvFiVpl/d/7BJ5FdLaiXyUedwzsoOB0FK1i07mZ4ikd9BtzIAHMqlseuDVbX76fSdQTVbNFZXTy5lK56dKd4idVsY4ecmYOM9+uf6VuTWiXETRyqrI3VTU03ab+R34qPNgaK85/+2nmOr6vLqdx9on2A4wFQYArFdic56V3F/4IjklzbXLRjP3WGQPxrKm8Eaon3Hhk9BnBNdsakDxJU5djmMDGauac1tHdq9y2FXkD1NXj4U1cBj9nHH+1UDeG9VXrbH8Ktyi9LiUZJ3sbMuu6cLdYokPy9DnBH0NOtfHmoWY8veLi3PBin+YY+tc1NpdxbkicLG3oTz+VQGMKeX/SphTithynJ7no2neJPDl22XjfTbhiMlOYyfp2/SuhEbSQh7cxXsR6eWwyfwNeL1PaahdWMm+2nkib/ZbFU4EqR6lLHBK5WSFoXxypH8vzrGu/D0crPIiLIjHgKMEVm2XxBv41Ed/BFeR+rDDf4VtQeK/D1999rmwkPfG5f1yP5VPK0VdMxJbS4sYyY55VVRzG/PHpzVER2kuqTNPMIY2B24Xoegz6Cu2ka0urcmLV7K4iBBCv1/Hk1wOsx7NQuAChyf4DkfhTVxOxJPbXFmAY33x/wyxciq631vPxdw/P08yPj8xUTSypaw+S7jqCFNIuk3rqJGi8tT3kO3P5015iLBsvtSMbWX7QUG7b/EBUYKwFQ0a+aBuLdwPStbw7bzWk84F9bwO8fHzBs45wfSqFxHc3sjyE+bM524XuaV7sfS5Xa8Nyn2d+IT29D61HcZQLG8eGQbc+vvVmDQdUmkAFnKgz96QbQPxNO1n5bsIcZCjoOtNWvZC6ahpsK3aGI3UUJB48zIB/ECtZNEvEGY2gmX1jlU1z1kwEpU5wa104YVE9xxLSQTQygSxlfrXKXHNzLnn5z/Ounjb96v5VzNxxdzDvvP8AOnT3YSIgBSgdqDj059jTgQO1akBtAGa0tD0ttSvMvkW8Zy5B6+1U4beW8mSGFCzucACu60yyFlapAg6ck+p9aipKyGjUijREVEUKijAArF8QaqlqDaq4DsuT7CtGW7W3ieR2AVASxNeb3969/fzXLk/O2QPQdhWFOPM9Sr2NJbuBV4JPsBTTqUA/hc/hWSGxSE8dK6OVE3Nqy8QzabfG5tI13Fdp8wZqzL431mV2Pmxp7Kg4rnOc04D5sGnyoLmy/ivWJCP9McZ9MCqcmualL9+8nP8AwM1S24HApB0PpTshXHyTySNl2Zj6sxNNBK8g4PtTSfmNIu7vmgLjjJIR9403LH7xJp31pMH60ABHGa3rIiS0jY9Ufj8R/wDWrDYjbjvV/TJ3LrCANvUn88VlXV6b9Gd2Wf77R/xR/NH0j8IP+Saaf/13uv8A0okrua4b4Qf8k00//rvdf+lEldzWkPhRz4j+NP1f5hRRRVGIUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABXO+Pv+Sd+Jf8AsF3P/opq6Kud8ff8k78S/wDYLuf/AEU1AHy/KgEYGDnbkjB4p0UjIEduQv6U6KdpGAbJVhyM9alljMhEaRKu48c8iuJytozotfVB9pbAKvnDZ5Fa+m63c26sPO8x3kyUkXOB3xWKxNuqo0bq6k5Yd6kt4y7sw3FTy2OopOzRSumdT4ikWWKEqycMCBn5gD7fhXQLtJ3q/I565rz6SQmTJO7POT1HtXTWuqyyKofhuScDtXKvjfyPWrO+Bo+s/wD206LaWAOceuec1FLG4fcgQr3qiNRE0xigkRmXhh1I75q0hkGAuCnetDguhXUoVzkgnp1pojj+fAIK8EVS1C+/4mFkkLow3fMueeeOlaEoz+843rw386oW5k6hoFrfxu0vDHOG9PeuSv8Awo1shkinaRR2Kc16Qi7IVVhnuc1HJbrJjco9atVHHYhwUjxya1eLJwSo71XNet3uiWV2SrwDcRncvBrk9V8HzZ32zj2Uj+orojWT3OeVJrY4/HNPqxc6ZeWT7Z4HX3xxUSR55JG0d61TVrmVnexEcg1oadEs6bZGIUtgkdQKoOhU89KmtndXBU9O3rQ9h9Trbi60Pw9AEsg13dMM+Y3Rfw9a5ubUp53Y7Ms3JY/Ma6CLWjLbM0fh2KTYPmfYWx+lVf8AhI5L/NhHYWsAl+XfGuGWoSsU2Z2nqsWL27P7veFGR6966OLSbdhfwCRRHKQ8EucMnofpzWLqTxvdQWMQHlwrlvrU8eoyWWpWw5dTAhCnv1B/Ohp7hfoQ6vba7pzCG488wk4WdclG+hrNv5d923yYVQFB9cV3eqM91pUV5p07CyyPOtjzggdR3HbIrg7x1yQTliefrVR1JYy1bbdJ6HitxADzXPIxWRW9Oa6KEq6AjPPtWdTQqJIqjIIPQ1YuvBbSiK68x4luV8xQykjk+oFVyjZ74rbsPEmq2ECwJMrxKMKkqBsVEZWKauYR8DzbSVvo8f7SkU1fA18T+7u7Zz6ZNdknja/A+e1tW/4CR/WnjxzcY5sLc/RiKv2jJ5TO8M6FdaQszzx2xmkG0E5JC+xHSt5IViU+ZaEr3MTbv061SPjl++nQ/wDfZqJ/HBx/yD4Pwc/4VLd9x2OS8Z6lbm5+w2ROxeZcjBLf3cVyyJ1Jro7mx0+/vJZ2NxE8jFidwfk/UCup0/4baXNYxz3WqSh3XdsXauP51pFxWiJaZ5kyjORzSL05zXZavpXhrSbw2xe7nZRk7HBArPM3h1Pu6dcP/vSf/Xq7isc70PWlDfNuya3zqmkJ/q9EU+7OTSjxBBGP3OiWY/3lzTuFjADcnjIPalwzDhCfoK3x4pkX7um2Sn2iFMfxXqB+4kEf+7EKVwMmOwvZhujtJm9whqZNG1Fulsy/7xAq1/wkWpMwaS4JUfw4ABpJfEVxLuGyMA/7OaV2GhEuh3mcM0SH3YVL/YoT/W3cS/Q1l+awcspYH61JAs9zIlvCheRzgADJJpu4F9rLT4wS9yZCOwpLaIPeK1vERFGPmIHT3NdlpXhKygsCmoRi4mk5YhiNnsDVq/0+303QbiG0hWNG27uSSfmHeuatUXJJeR6OWQf12i/70fzR6j8IRj4a6eP+m91/6USV3FcP8Iv+Sb2H/Xxd/wDpRJXcV0w+FHHiP40/V/mFFFFUYhRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFc74+/5J34l/7Bdz/wCimroq53x9/wAk78S/9gu5/wDRTUAfMtvA5ljaORc45HTFPllWNjtlJbuQKpx3WyZQpwMcinm6VrrIjXAGCBxmuBxd9TqTVi4rXEko3iQRkcA8CphE8UTyLIFiYcpRDeFohneFHTODUjuEg8zG7ByBis+Z3sWkiBJFkiHyjI4znnFXheRwoFYHeoxn1rPByxb5G3c7lGPwp5gnmlDKhwPXis7L2juehiL/AFGh6z/9tLOn35ivHnII3A8KeDXRxahstld2Adh2PeuTEMm5Y1Qs2CTjmppZhGmC+ADxgZq20noecm1udbp7wM/mMMynq5Pf2qea4ihmRGGfOIGfof8ACuMt7y4aYeWz7c4wK2Uu0Nzbhl5DZYk54GaZSkdIzbiHRCOed5xmgM5kbgKu3jB5zVMTSO6srgRv90r0pWkRXKyvhtuRlgM0xlgliRkEgjk9qaXj8woZk3A8rkZFV47m3lmMEcmXC5IAP50v9nxS3aXhQCQLt5PHsaNxk9xFG0LGaNTGRXN3XhSC7tzJE3kuSW27cj2zW1l7qX7PhnRTufPGfQVdRfMjwUCtnr71UZNEySZ5ne+F9RtG+VVlH+w39Ky/s88EgEsbIfdcV64tsGYptBZehznP41UuLGKbcs0Qdem3aa2VbuYyo32Od0rxVLbWsNnbWitcEbC7H5T6HFZ0k80utvE6xhs5kKRhc0uqWf8AYOrxTQKxhzuXcPzFdBpFlpuu3JvBM0EwXGwj75rS63MrNOxyMSqJ727k5jVtuai1VjcWlje26kBA0TY6ggkj9DXY2vhdrvQ76AlYpBOfnfgfKfWqradD4WtG8/UIJ/N/5ZJywPYiq5ibGBZardQWLZO0yHof51jyYeUsccnNWLy6M0zP/eJwPQVUzkbj9KpAA6Z7YrZ0/UYYoAsrYI459KyA/wDCBgdyRTGA3EDpSauCdjp/7Vs8f61fzoGqWg/5bKa5NiC54wKAKn2SHzHWHVLTGTMv4VVl1G1Y/LMyj61gYowKfIg5mbialZpnfK0n+8KVtXtACECj/gNYO0UbafIhXZptqKMThzz7YqBrvLZ8xvzNUsYpRRyoLlhpyxJA/E1H5hP3j+VNzxTDzVJCJVeMHlWP402RwWBQFRjpnNMoNFgE3N60bj6miigAPPWjHekHWn0wJrS0nvZhHCmSe54A+teg+H9ItdHUyNKkl0w5f+77CuDhu3jGBx6EcVeg1dxneWJ6VlOLZpFpHpf2gOcoy4zkk/0rP1iYvpU3I5x/MVycWpkrhWKqPQ8VabU/tEDxE8sAOevFc1Wm1BvyPRy6aeMpf4o/mj3H4Rf8k2sP+vi7/wDSiSu4rh/hDz8NrD/rvdf+lEldxXZD4UcGI/jT9X+YUUUVRiFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAVzvj7/knfiX/sF3P/opq6Kud8ff8k78S/8AYLuf/RTUAfLARgrTbCygYJHaoInJkGBzSRzsw8tOF7j1pYj5cwOOKwSZpzFxZgse0vjB6GhZ5gCd+cDp6iqkrEyHbgKTnmnBlIGBknjg1DpornNiGdZYcKhGDknGKinuZfMyLkuCeFqtDcJDgyMVUjn60w/ZFxmeT1HH/wBaubl5ajdn91z2/ZrE4KlGM4pxcr3klva2/oaa3Txx5WQFep29RVEypNI5DZyM0kYtj9yV8/T/AOtRGLWJiokYluxFVzJdH9xj/Zs3/wAvIf8Agcf8zW01jBHvbIVME84JHqKjLnc0ykkbuMHnFRbkZVDchRgZPakFxEAUXAxweDWXtHfZ/cP+zZr/AJeQ/wDA4/5m7p12PINvIjHA+TB+6KtNewCApKT58fzLnvXORTsj7kchuxq0txMxyFRvc85qfaNbp/cUsun/AM/If+BxLWkazHFNO9y+HbG3OTgc/KK6Ge6kFowKcsuBk85NcYtqTKrBE353AgitBby9dgo2MYzyS+f61ftFvZ/cCy2f/PyH/ga/zOntQxV1JAGfmfOM/SiKzhEu3dKBkEYlYisSK51XO5LeEdsdB/Opv7Q1nbt+zQfUf/ro9ouz+4ay2f8Az8h/4GjoRkEhfl7ZPNQOkSuACfNY7dwasM3WtGRS1vGSDkDPX9aY1xqrzEfZow49Dx/PFHtPJ/cP+zZf8/If+Boua7brf2pgYFwq5zgEqfbn3rhWa60i5eGYMADyMkZ9wa7JH1hQVFrEc88tn+tUdQsr2+KieyiDKOCpGefxrWnXUdGn9xnPKpS2qQ/8DRjnxBdmzeD7ZP5bnO3d0z15rJmlLruB59zmtpvCtwxx5bAn0dRUZ8LXHTbL9Ay1qsRT7P7jF5TU/wCfkP8AwNHPOQOByetMzgjnj0rbl8PmEkOlwCOuBn+lVW022U4Zpwfdf/rVaxEeif3C/sqf/PyH/gcf8zPZucDpUZPatL7HZf8APWX8v/rUfY7L/nrL+X/1qft12f3MX9lT/wCfsP8AwOP+ZmUorR+y2GcebL+X/wBal+zWPXzpfy/+tR7dfyv7mH9lT/5+0/8AwOP+Zng0prQ+y2R/5ay/l/8AWpDbWIODNLn6f/Wo9uuz+5h/ZU/+ftP/AMDj/mZ1LWh9msR/y2l/L/61BtbLGfOlx9P/AK1L267P7mH9lT/5+0//AAOP+ZnGitD7LY/89pfy/wDrU4Wdmeksv5f/AFqft12f3MP7Kn/z9p/+Bx/zM2kNav2OzyR5smR/n0pwsrQfxyfj/wDqo+sLs/uYf2VP/n7T/wDA4/5mQASKUITWwLC3J4d/8/hTvsECnlpPof8A9VH1hdn9zD+yp/8AP2H/AIHH/MxxEaPJycVtixhAz8xAPftTlsohwNxzS+sLs/uD+yp/8/Yf+Bx/zMQW7dcGniBs88itv7AAeQ/XpT/sm7K/N79KPrC7P7h/2VP/AJ+Q/wDA4/5mOtqT347VIluwfHX2rVFrtA4PtSBUdsqvPQVPt12f3B/ZU/8An5D/AMDj/mUhAUGTyRU8S7WUe569qsm3bHzKxGaUREMfl5rOpV5oNJP7jrwWXuliadSVWFlJN+/Ho/U95+EH/JNdP/673X/pRJXc1w/wh/5Jrp//AF3uv/SiSu4rsh8KPErtOrJru/zCiiiqMgooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAoorL1jxDpuhNbLfyTh7pmSGOC1lndyoycLGrHgc9KANSisW58V6RZ6fa3tzNcRJdsVgiazm86QjOcQ7PM4wT93pz0p8vijRYdFg1hr9GsbhgkMkaM7SscgKqKCxbg/KBng8cGgDXrnfH3/ACTvxL/2C7n/ANFNWrperWOtWQvNPnE0JYoTtKsrA4KspAKkHqCAayvH3/JO/Ev/AGC7n/0U1AHydbIkjBSME9CKtvH5WAVH1qpFlQCQR6U97h5ZcMSQKxauy09LA8bSZYAketC5VMD86s28pgYEdPQ9DUs0MdwC8OEkPJjJ4P0pOVnZisU5GzEhIqNwVYEj6VL5bkKCpBVuQalxvBUrzT5khldG4I6VagtGkXfkeoqmilnZW+XFPjkeMbskBeMZokr7DTtuX45oywCqVYfe3dKdM6h/NTBDdqzDMzMcADNTCUqVQndtH61m6fUrnLJkRh1JGe1XA7i3IBwMcetZ0OSR8vGckmrZlZwQcAnt7VEo30KixFmmkyNwCqMA9DVyzuJrZcr/AB9eOtY7thyoJ4bOKnt7xkj6tuHFU46CT1N62u5Vn+Z8KCOAM9ff863beeGUYDeYMY45/WuM+1yy7hsCqRgnqTU9kbtIf9HbGG6Z561nKJopanaF0SIhI/KVQSMnlqar+TZhpUY5G4qoySSc1grqEj3EcFwy5P3yGycA5rTa7QKWYhiBnDHpUdS7osRzpMGMLBCBzv6A59OtWPk4SRmDqOhPLVnssdzGkjgRHqCpyw/GnXBkUKSfMQdGBww7fjVAaG9CQTG28juRijz43AjyQ1V4rotEpI2kDJULjP8A9alXcSrYRWbJJPYUBcsTWsTQ/MVHYkd/rVcabAzFX24A6etIrOJcEbx6EcD9easK25dxLIo/iB6/5NGqCyZny6DC672hUc9ABxVGbw2gG5Mjd2xmuicoxZXkbAPUjk0TSAIVjPzA4U01NolwTOWPhk7MnG09Rjmnf8I0wXmLJ6DbiumN0W4ZWJA59M/WpwXMe9cAjgd6r2kifZxOJm0OW2/1ls7ZOPXAqlJpiu4IjIOe6la72XczqrxnbjHPT9ac9nA5IMXGP4e5/Cq9qxeyR53/AGZgdVHtTfsG4EAYAPcYru/7EgJwUdN3PBNMOhQsAA7g547in7UXsjixpahcH+L2qRNNXaRg8etdi+gMDtSYle5C8GoP7Bm2uwkXGPTGaftRezOXFgqLnbuUH0pXsQvLKMAZIrel0m6jUfKGB67TUJ02fA2Qk5+9nHT86PaIORmMtsFlyqkAn6ipfs2WbgH0B71sDSpn2/um+oNTHSLnIfYgA4+n1o50HIzDjTLsNmATkEetSm3AbkcjnPcVtpochPL7T16VYTR0AJZuQe/Sl7RDUGc66PyFyxPQ45pY7cgdCWP610yabbq2WBP+yeKcdKtZMrsG5eQN2KXtEPkOW8nbnIAIPSgRkkysuMnH1rpzpcG7/V9emCaVdLt1XY0fPqe1L2nYfszm3j4Ge/8AnmojGFBOG6muzGnwhcCJMKOuAc1CbeNXO1FAHUYpe0HyHoHwj/5JvY/9fF3/AOlEldvXG/C0Y8B24HQXl5/6Uy12Vdi2OV7hRRRTEFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFct4wXw352myeINVfTpImka1lW7a2OdoVsOpHOG6Z5z3rqaKAPKdH1eSx1jRtc8QXUv9krDqFlaahersJUzRtC8hIGC8aEBiBnA9abYuNPOieIrxXg0U65qFyJJEKrDHMJBFIw/hUknk9PMHrXrFFAHJeCpFvLzxJqttn+zr7UvMtWxhZQsMSNIvqC6Nz3xmq/xHg11vB/iGS11HT49PGmT+ZBJZO8rDy23YkEoAz2+U4967Ws3xFph1rw1qmlCYQm9tJbfzCu7ZvUrnGRnGfWgD4+RvlGWzSlPnyPxr07QPgdda34d03VR4lihF7axXHlf2eW2b1DYz5ozjPXFaY/Z+ux/zNcP/gtP/wAdqbDPH1mdeN2R6GpSxaPfGNpHUA161/wz7d/9DVD/AOC0/wDx2nD9n+8GceK4ef8AqGn/AOO0nG+wXPLLSTz0kWVvm4wTTMNFKFcYIr1Yfs/3gOR4rhB/7Bp/+O08/AS+YAN4rhOOhOmn/wCO1Dpu5V0eQTYEzY7800ErESQD9a9eP7P94WyfFUOf+waf/jtH/DP94c/8VVDz/wBQ0/8Ax2q5WK55Bwse9iMn7oFRISZAa9ib9n67Ck/8JVDwP+gaf/jtZ2gfBG71vw9purDxLFD9utY7jyv7PLbN6hsZ80ZxnrinyiuebpITGQ0ny/3R1qRJo2IBZ8+or1df2f7xeniqH/wWn/47QPgBdhtw8VQZ/wCwaf8A47U+zGpHlDwqsZkRyxHXNSWa+dKGmUke3FerD4C3wBH/AAlcGD/1DT/8dp8fwIv4s7fFcAz/ANQ0/wDx6o9nKxSkjzQQRENvkEaZ4A61AJTE7eUwZB/e6mvTz8BL05/4qmDk5/5Brf8Ax6m/8KBvN2f+Eqgz/wBg0/8Ax2pVCXVlOouh5ibhVYPgLkY2ir1lcF3AyWU9Qeh+tehD4CXo5/4SqAnpzpp/+PVJH8DNQgy6+Krfgd9Nb/49Q6LBVEcUxaRmV52Vf7qDA+lTtk4G4lQO5PFdL4d+FOp6x4f07WF8R2sP2+1iuTEdPZtm9Q2M+aM4zjOBWkfgvqjEk+KLX/wWNx/5Go9jIftInmSardfbXtxGzqDsL5OBW5GzRMEkZ2j5BLcY/wDrV2Nv8F9Tty23xNZncSSG0tuv/f6rL/CTWJB83iaxyO/9lt/8epunJ9AVSPU462nQyMyK4UNjJXrT0uHkGQmEU8bWxmuv/wCFT63s2DxTZgd8aY4z/wCRqif4P6w5yfFNoPTGmNx9P31T7GRftYnOXckYjRnyJMHgHkD+tRpcStkMu3j5cnGTXVf8Kj1jaVPiayOeSTpbZP8A5GpW+EutOAD4osto/h/stsf+jqfsZB7WJyf2hxmMOxGcHvzUiXUsWIwwZjwCB1rpT8I9YQmRfE9mCB0GmN/8eql4e+HWs6z4f03WF8Q2UJvbWK4EZ01m8veobbnzhnGeuKXsZB7WJlBytuzMckHkk4FDShkIJwQuetdP/wAKn1okk+J7E7jkg6W3P/kal/4VTrec/wDCUWX0/stv/j1HsJB7WJzCXjMWwSrE8Engj39qtpLIsYd5FlB6EdK3X+FetSYz4lsAQMZGlNz/AOR6YfhPre0KPFNmFHIA0tv/AI9S9jIPbRMXz1lJCEhsZ2561XurryI2kb7gOD82cV0q/CzXFXb/AMJPY/X+y2z/AOj6a3wo1l/v+JbBuc86W/8A8fo9hMPbROa+1iRRksAcED0qeBFZjudWwM4A4rdT4T6yhyPE1ln1/st//j9Sf8Kv1xIz/wAVLp/HOf7KbP8A6Pp+xkHtomIZ8SEgnC/wgcGmtO4YDyuD1bIHFXtA8Ca5rXh3TNVXxBYW4vbWO4EQ0x22b1Dbc+cM4z1wK0f+FYa6f+ZnsP8AwVv/APH6XsJj9tA5vzgNwyc9TzUDXjxFvkG0gEMW4PtXVf8ACrtcxj/hJrD/AMFb/wDx+kHwt1sDB8S6eR6HSn/+P0exmHtoHLpdpNNlnXG3gLTxcDB+VlI6HIIx6V0jfCrWmOf+Ek08H20p/wD4/TR8KNaB/wCRmsf/AAVv/wDH6PYSF7aJz7OOCGwV5JNJHcbm+fggHGDj866RvhbrjZB8T2OD/wBQt/8A4/SD4V62BgeJrD/wVv8A/H6PYSD2sTmvtRVdyEEdeR3pHlHlb9yg9xjBro3+FOtbWJ8TWOevGlsP/a1UdA+H2ta34d03Vv7fsIvttrHcCM6azbN6hsZ84Zxnrij2Ew9rE7H4WHPgK2Prd3nX/r5lrs6w/CGgSeGPDcGlTXa3ckck0jTLF5YYySM/3cnGN2OvatyutaI5nuFFFFMQUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABVPVNUstGsXvNQnWG3UhdxBYlicBQBksSTgAAk1crk/HLC2GgalMD9hsNVSe7YAkRoY5EDn2VnUk9uvagDb0XVtM1axMmlyBoIW8lo/KaJomAHyMjAFSARwQOCK0a5DwlcRan4l8TaxYsJdNupLeOCdfuTPHHh2U/xDlVyODtPpXX0AFFFFABRRRQAVl654h0vw3Z/a9VuHhg5+ZYXkwAMk4QE4A5zWpXKfEjU7HTvh/rq3t5DbtdafcQQCVwvmSGJsKuepPpQBr6l4h0vSbW3uLy4ZUuTiBI4nkklOM/KiAseOTgcVD/wkuhWvh+31RLyIabIVjgMUbNuOcBFRRuLZBG0DIweOK5M6/pC614X8QNqFvJo0Vjc2T3iuGignbyWAZhwpIRhz9O9Zto4szpXiO6V4tEPiG9uvMdCFjilSRI5mB+6pY5yenmA0Ael6Vq9jrdn9r0+cTQhzG2VKMjDgqysAVI9CAau1yXgyRL7U/E2rWnOm31+jWsgGFl2QRo8i+oLKRnvtzXW0AFFFFABRRRQBm6tr2n6IYVvZJg8+7y0htpJmbbjJ2xqxwMjn3qKTxRosehxayb+NrCUhI5EVmLsTgKqgFi2cjaBnIPHFUPGXjGz8J2tskk1st/fMY7RbqURRZGNzu56IuQT3OQBya5krpul6X4U1SHVIdT0u01We41C/hIaPzZY5gZTtyFUSyY/2QR6ZoA7K28S6F/YEmqQ3kUWm2pMchaNo/JIwNhQgMrcgbcZ5HHIqzpGu6drscz6fOzmF9ksckTxSRkjIDI4DDIORkc15xeOL46x4jtFebRBr9hdGSNCVlihWNZJVA+8oYA5HXyye1dP4cvLfWfHOt6xpcqzaY1la232iPmOaZWlZtp6NtV1BI9cdqAOxooooAKKKKACs/VdastGSNrw3H70kIsFrLOxx14jVjWhWH4m1aw06ySC88QLoklxnyro+WD8pGQPMUr3AwRnnigBJfGOgxaba3/2/wAyC6do4BDC8kkjLncBGql8jByMcY5xTz4l0K00C11NLyIadNtjtzDGzbz0CIigsW4I2gZGDxxXnfhu8g0TVdL1jVX8vSiupW8epzqyLNI88cgnfd9wyBXweAccYBAqaxcaedE8RXivBop1zULkSSIVWGOYSCKRh/CpJPJ6eYPWgD0vS9WsdasheafOJoSxQnaVZWBwVZSAVIPUEA1drkvBUi3l54k1W2z/AGdfal5lq2MLKFhiRpF9QXRue+M11tABRRRQAUUUUAIzBVLMQABkk9qwovGnh640R9Yi1FXsFn+ziQRPl5M42qu3c5OeNoOe2aZ4wsdX1LTIbPTILaeGSYfbYZrloPNhAOUDhGxuOAeOVyOM5rzvSri9tLmHU9Wsbe00vT/FF09w0M5lWIvFLHub5FwiOyDd/tZwuKAPS7bxLoX9gSapDeRRabakxyFo2j8kjA2FCAytyBtxnkccirOka7p2uxzPp87OYX2SxyRPFJGSMgMjgMMg5GRzXnF44vjrHiO0V5tEGv2F0ZI0JWWKFY1klUD7yhgDkdfLJ7V0/hy8t9Z8c63rGlyrNpjWVrbfaI+Y5plaVm2no21XUEj1x2oA7GiiigAooooAKy9Y8Q6boTWy38k4e6ZkhjgtZZ3cqMnCxqx4HPStSuW8YL4b87TZPEGqvp0kTSNayrdtbHO0K2HUjnDdM8570AX7nxXpFnp9reXM1xGl2xWCJrObzpCM5xDs8zjBP3enPSg+JdCtNAtdTS8iGnTbY7cwxs289AiIoLFuCNoGRg8cVwej6vJY6xo2ueILqX+yVh1CytNQvV2EqZo2heQkDBeNCAxAzgetNsXGnnRPEV4rwaKdc1C5EkiFVhjmEgikYfwqSTyenmD1oA9L0vVrHWrIXmnziaEsUJ2lWVgcFWUgFSD1BANXa5LwVIt5eeJNVts/2dfal5lq2MLKFhiRpF9QXRue+M11tABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFABRRRQAUUUUAFFFFAH/2Q==", - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from IPython.display import Image\n", - "import os\n", - "\n", - "Image(filename = os.path.join(\"generated_dataset/bboxes_visualization\", \"bbox_70.jpg\"))" - ] - }, - { - "cell_type": "markdown", - "id": "64fe2dc9", - "metadata": { - "id": "64fe2dc9" - }, - "source": [ - "## Convert the dataset to YOLO format" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "3dd01a6a", - "metadata": { - "id": "3dd01a6a" - }, - "outputs": [], - "source": [ - "from datadreamer.utils.convert_dataset import convert_dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "9b9bb74d", - "metadata": { - "id": "9b9bb74d" - }, - "outputs": [], - "source": [ - "convert_dataset(input_dir=\"generated_dataset\", output_dir=\"generated_dataset_yolo\", dataset_format=\"yolo\", split_ratios=[0.8, 0.1, 0.1], copy_files=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a167a842", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "a167a842", - "outputId": "6f272b02-5b41-4f4c-cd41-2ed37e461e58" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "data.yaml train val\n" - ] - } - ], - "source": [ - "!ls generated_dataset_yolo" - ] - }, - { - "cell_type": "markdown", - "id": "d2d660b0", - "metadata": { - "id": "d2d660b0" - }, - "source": [ - "# Train your model (YOLOv8 as an example)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "982e475e", - "metadata": { - "id": "982e475e", - "scrolled": true - }, - "outputs": [], - "source": [ - "!pip install ultralytics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "184cf0fa", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "184cf0fa", - "outputId": "6d5837d1-cbc1-4460-f9ec-93ec290c7fc5" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 6.23M/6.23M [00:00<00:00, 327MB/s]\n" - ] - } - ], - "source": [ - "from ultralytics import YOLO\n", - "model = YOLO(\"yolov8n.pt\") # load a pretrained model" - ] + "cells": [ + { + "cell_type": "markdown", + "id": "8ce1517f-7258-406d-9139-9adadb1a1570", + "metadata": {}, + "source": [ + "\n", + "\n", + "# DataDreamer Tutorial: Generating a dataset for object detection, training a model, and deploying it to the OAK (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5_2ivH03etO", + "metadata": { + "id": "b5_2ivH03etO" + }, + "outputs": [], + "source": [ + "!pip install datadreamer" + ] + }, + { + "cell_type": "markdown", + "id": "c3704c07", + "metadata": { + "id": "c3704c07" + }, + "source": [ + "## Generate a dataset with your own classes (might take some time to download all models)" + ] + }, + { + "cell_type": "markdown", + "id": "M4v-QieP4tXL", + "metadata": { + "id": "M4v-QieP4tXL" + }, + "source": [ + "Make sure you are using the GPU runtime type (in Google Colab).\n", + "\n", + "~8 min to generate 100 images\n", + "\n", + "~2 min to annotate them" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab1e2f9", + "metadata": { + "id": "6ab1e2f9", + "scrolled": true + }, + "outputs": [], + "source": [ + "!datadreamer --save_dir generated_dataset \\\n", + " --class_names robot tractor horse car person bear \\\n", + " --prompts_number 100 \\\n", + " --prompt_generator simple \\\n", + " --num_objects_range 2 3 \\\n", + " --image_generator sdxl-turbo \\\n", + " --use_tta \\\n", + " --image_annotator owlv2 \\\n", + " --conf_threshold 0.15 \\\n", + " --seed 42" + ] + }, + { + "cell_type": "markdown", + "id": "7a10755e", + "metadata": {}, + "source": [ + "### Parameters\n", + "- `--save_dir` (required): Path to the directory for saving generated images and annotations.\n", + "- `--class_names` (required): Space-separated list of object names for image generation and annotation. Example: `person moon robot`.\n", + "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to `10`.\n", + "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to `False`.\n", + "- `--task`: Choose between detection, classification and instance segmentation. Default is `detection`.\n", + "- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.\n", + "- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.\n", + "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", + "- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`.\n", + "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.\n", + "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`.\n", + "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", + "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", + "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", + "- `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `\", hd, 8k, highly detailed\"`.\n", + "- `--negative_prompt`: Negative prompts to guide the generation away from certain features. Default is `\"cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution, static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy\"`.\n", + "- `--use_tta`: Toggle test time augmentation for object detection. Default is `False`.\n", + "- `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`.\n", + "- `--use_image_tester`: Use image tester for image generation. Default is `False`.\n", + "- `--image_tester_patience`: Patience level for image tester. Default is `1`.\n", + "- `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.\n", + "- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.\n", + "- `--disable_lm_filter`: Use only a bad word list for profanity filtering. Default is `False`.\n", + "- `--keep_unlabeled_images`: Whether to keep images without any annotations. Default if `False`.\n", + "- `--batch_size_prompt`: Batch size for prompt generation. Default is 64.\n", + "- `--batch_size_annotation`: Batch size for annotation. Default is `1`.\n", + "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n", + "- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.\n", + "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n", + "- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7add74d9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 497 }, + "id": "7add74d9", + "outputId": "a5389937-2a4d-448b-e2f2-6be98018d9be" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "bb4e6754", - "metadata": { - "id": "bb4e6754", - "scrolled": true - }, - "outputs": [], - "source": [ - "results = model.train(data = \"generated_dataset_yolo/data.yaml\", epochs=50)" + "data": { + "image/jpeg": "", + "text/plain": [ + "" ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "\n", + "from IPython.display import Image\n", + "\n", + "Image(filename=os.path.join(\"generated_dataset/bboxes_visualization\", \"bbox_70.jpg\"))" + ] + }, + { + "cell_type": "markdown", + "id": "64fe2dc9", + "metadata": { + "id": "64fe2dc9" + }, + "source": [ + "## Convert the dataset to YOLO format" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3dd01a6a", + "metadata": { + "id": "3dd01a6a" + }, + "outputs": [], + "source": [ + "from datadreamer.utils.convert_dataset import convert_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9b9bb74d", + "metadata": { + "id": "9b9bb74d" + }, + "outputs": [], + "source": [ + "convert_dataset(\n", + " input_dir=\"generated_dataset\",\n", + " output_dir=\"generated_dataset_yolo\",\n", + " dataset_format=\"yolo\",\n", + " split_ratios=[0.8, 0.1, 0.1],\n", + " copy_files=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a167a842", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "a167a842", + "outputId": "6f272b02-5b41-4f4c-cd41-2ed37e461e58" + }, + "outputs": [ { - "cell_type": "markdown", - "id": "d8b05e33", - "metadata": { - "id": "d8b05e33" - }, - "source": [ - "## Show the predictions" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "data.yaml train val\n" + ] + } + ], + "source": [ + "!ls generated_dataset_yolo" + ] + }, + { + "cell_type": "markdown", + "id": "d2d660b0", + "metadata": { + "id": "d2d660b0" + }, + "source": [ + "# Train your model (YOLOv8 as an example)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "982e475e", + "metadata": { + "id": "982e475e", + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install ultralytics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "184cf0fa", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "184cf0fa", + "outputId": "6d5837d1-cbc1-4460-f9ec-93ec290c7fc5" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "id": "b559b1f9", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "b559b1f9", - "outputId": "37ab5dd6-ecf6-4fb5-86b0-dae0b092c14c" - }, - "outputs": [ - { - "data": { - "image/jpeg": "", - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Image(filename = os.path.join(results.save_dir, \"val_batch0_pred.jpg\"))" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...\n" + ] }, { - "cell_type": "code", - "execution_count": null, - "id": "dec0cb11", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "dec0cb11", - "outputId": "677a9ba3-0386-4b77-dd53-53d9407119e5" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ultralytics YOLOv8.0.225 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", - "Model summary (fused): 168 layers, 3006818 parameters, 0 gradients, 8.1 GFLOPs\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/generated_dataset_yolo/val/labels.cache... 21 images, 0 backgrounds, 0 corrupt: 100%|██████████| 21/21 [00:00" ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(filename=os.path.join(results.save_dir, \"val_batch0_pred.jpg\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dec0cb11", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "dec0cb11", + "outputId": "677a9ba3-0386-4b77-dd53-53d9407119e5" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Generating the .blob file using [tools.luxonis.com](http://tools.luxonis.com)\n", - "After the training and validation, you can convert the fine-tuned PyTorch model to a `.blob` format.\n", - "\n", - "Please follow these steps to do so:\n", - "1. Download the fine-tuned weights from `runs/detect/train/weights/best.pt` to your device (as shown in screenshot below)\n", - "2. Go to the page [tools.luxonis.com](http://tools.luxonis.com)\n", - "3. On the page set Yolo Version to `YoloV8 (detection only)` ( (as shown in the screeenshot below)\n", - "4. On the page set File to the downloaded `best` weights (as shown in the screeenshot below)\n", - "5. On the page set Input shape to `640` (as shown in the screeenshot below)\n" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Ultralytics YOLOv8.0.225 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)\n", + "Model summary (fused): 168 layers, 3006818 parameters, 0 gradients, 8.1 GFLOPs\n" + ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![image.png]()" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/generated_dataset_yolo/val/labels.cache... 21 images, 0 backgrounds, 0 corrupt: 100%|██████████| 21/21 [00:00\n", + "\n", + "# DataDreamer Tutorial: Generating a dataset for instance segmentation, training a model, and deploying it to the OAK (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5_2ivH03etO", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b5_2ivH03etO", + "outputId": "c92b1e2e-cd3e-4a7d-8be6-776e0dfad5bc" + }, + "outputs": [], + "source": [ + "!pip install -q datadreamer" + ] + }, + { + "cell_type": "markdown", + "id": "c3704c07", + "metadata": { + "id": "c3704c07" + }, + "source": [ + "## Generate a dataset with your own classes (might take some time to download all models)" + ] + }, + { + "cell_type": "markdown", + "id": "M4v-QieP4tXL", + "metadata": { + "id": "M4v-QieP4tXL" + }, + "source": [ + "Make sure you are using the GPU runtime type (in Google Colab).\n", + "\n", + "~4 min to generate 30 images\n", + "\n", + "~43 secs to annotate them" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab1e2f9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6ab1e2f9", + "outputId": "6f57eb7a-f261-46bc-e574-3631cade8660", + "scrolled": true + }, + "outputs": [], + "source": [ + "!datadreamer --save_dir generated_dataset \\\n", + " --class_names tractor horse bear \\\n", + " --prompts_number 30 \\\n", + " --prompt_generator simple \\\n", + " --num_objects_range 1 1 \\\n", + " --image_generator sdxl-turbo \\\n", + " --task instance-segmentation \\\n", + " --disable_lm_filter \\\n", + " --annotator_size base \\\n", + " --use_tta \\\n", + " --image_annotator owlv2-slimsam \\\n", + " --conf_threshold 0.2 \\\n", + " --seed 42" + ] + }, + { + "cell_type": "markdown", + "id": "7a10755e", + "metadata": { + "id": "7a10755e" + }, + "source": [ + "### Parameters\n", + "- `--save_dir` (required): Path to the directory for saving generated images and annotations.\n", + "- `--class_names` (required): Space-separated list of object names for image generation and annotation. Example: `person moon robot`.\n", + "- `--prompts_number` (optional): Number of prompts to generate for each object. Defaults to `10`.\n", + "- `--annotate_only` (optional): Only annotate the images without generating new ones, prompt and image generator will be skipped. Defaults to `False`.\n", + "- `--task`: Choose between detection, classification and instance segmentation. Default is `detection`.\n", + "- `--dataset_format`: Format of the dataset. Defaults to `raw`. Supported values: `raw`, `yolo`, `coco`, `luxonis-dataset`, `cls-single`.\n", + "- `--split_ratios`: Split ratios for train, validation, and test sets. Defaults to `[0.8, 0.1, 0.1]`.\n", + "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", + "- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`.\n", + "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo` or `sdxl-lightning`. Default is `sdxl-turbo`.\n", + "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`.\n", + "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", + "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", + "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", + "- `--prompt_suffix`: Suffix to add to every image generation prompt, e.g., for adding details like resolution. Default is `\", hd, 8k, highly detailed\"`.\n", + "- `--negative_prompt`: Negative prompts to guide the generation away from certain features. Default is `\"cartoon, blue skin, painting, scrispture, golden, illustration, worst quality, low quality, normal quality:2, unrealistic dream, low resolution, static, sd character, low quality, low resolution, greyscale, monochrome, nose, cropped, lowres, jpeg artifacts, deformed iris, deformed pupils, bad eyes, semi-realistic worst quality, bad lips, deformed mouth, deformed face, deformed fingers, bad anatomy\"`.\n", + "- `--use_tta`: Toggle test time augmentation for object detection. Default is `False`.\n", + "- `--synonym_generator`: Enhance class names with synonyms. Default is `none`. Other options are `llm`, `wordnet`.\n", + "- `--use_image_tester`: Use image tester for image generation. Default is `False`.\n", + "- `--image_tester_patience`: Patience level for image tester. Default is `1`.\n", + "- `--lm_quantization`: Quantization to use for Mistral language model. Choose between `none` and `4bit`. Default is `none`.\n", + "- `--annotator_size`: Size of the annotator model to use. Choose between `base` and `large`. Default is `base`.\n", + "- `--disable_lm_filter`: Use only a bad word list for profanity filtering. Default is `False`.\n", + "- `--keep_unlabeled_images`: Whether to keep images without any annotations. Default if `False`.\n", + "- `--batch_size_prompt`: Batch size for prompt generation. Default is 64.\n", + "- `--batch_size_annotation`: Batch size for annotation. Default is `1`.\n", + "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n", + "- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.\n", + "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n", + "- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7add74d9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 497 + }, + "id": "7add74d9", + "outputId": "cafd066a-b524-4006-e2d0-cd949d65c567" + }, + "outputs": [ + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "from IPython.display import Image\n", + "\n", + "Image(filename=os.path.join(\"generated_dataset/bboxes_visualization\", \"bbox_5.jpg\"))" + ] + }, + { + "cell_type": "markdown", + "id": "64fe2dc9", + "metadata": { + "id": "64fe2dc9" + }, + "source": [ + "## Convert the dataset to YOLO format" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3dd01a6a", + "metadata": { + "id": "3dd01a6a" + }, + "outputs": [], + "source": [ + "from datadreamer.utils.convert_dataset import convert_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9b9bb74d", + "metadata": { + "id": "9b9bb74d" + }, + "outputs": [], + "source": [ + "convert_dataset(\n", + " input_dir=\"generated_dataset\",\n", + " output_dir=\"generated_dataset_yolo\",\n", + " dataset_format=\"yolo\",\n", + " split_ratios=[0.8, 0.1, 0.1],\n", + " copy_files=True,\n", + " is_instance_segmentation=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a167a842", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a167a842", + "outputId": "715988c2-ab27-4ce2-b12c-2fa01188c537" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "data.yaml test train\tval\n" + ] + } + ], + "source": [ + "!ls generated_dataset_yolo" + ] + }, + { + "cell_type": "markdown", + "id": "d2d660b0", + "metadata": { + "id": "d2d660b0" + }, + "source": [ + "# Train your model (YOLOv8 as an example)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "982e475e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "982e475e", + "outputId": "1f4cb9f5-1d01-4882-a730-434e5122546f", + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install -q ultralytics" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "184cf0fa", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "184cf0fa", + "outputId": "dcc43a26-bc78-4d3d-ddb3-6932a8584df9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating new Ultralytics Settings v0.0.6 file ✅ \n", + "View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'\n", + "Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.\n", + "Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt to 'yolov8n-seg.pt'...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 6.74M/6.74M [00:00<00:00, 110MB/s]\n" + ] + } + ], + "source": [ + "from ultralytics import YOLO\n", + "\n", + "model = YOLO(\"yolov8n-seg.pt\") # load a pretrained model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bb4e6754", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "bb4e6754", + "outputId": "66b28d5a-6544-46fa-ee73-3074f141e981", + "scrolled": true + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['WANDB_DISABLED'] = 'true'\n", + "\n", + "results = model.train(data=\"generated_dataset_yolo/data.yaml\", epochs=200)" + ] + }, + { + "cell_type": "markdown", + "id": "d8b05e33", + "metadata": { + "id": "d8b05e33" + }, + "source": [ + "## Show the predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b559b1f9", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "b559b1f9", + "outputId": "bcb3fae6-27eb-4384-f532-c573ae45c599" + }, + "outputs": [ + { + "data": { + "image/jpeg": "", + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Image(filename=os.path.join(results.save_dir, \"val_batch0_pred.jpg\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "dec0cb11", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dec0cb11", + "outputId": "72cf4330-fa0f-47aa-82c5-242dc6978dcd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ultralytics 8.3.21 🚀 Python-3.10.12 torch-2.5.0+cu121 CUDA:0 (Tesla T4, 15102MiB)\n", + "YOLOv8n-seg summary (fused): 195 layers, 3,258,649 parameters, 0 gradients, 12.0 GFLOPs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mval: \u001b[0mScanning /content/generated_dataset_yolo/val/labels.cache... 3 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3/3 [00:00 - + coverage coverage - 55% - 55% + 75% + 75% diff --git a/pyproject.toml b/pyproject.toml index d7aae02..2468bdf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "datadreamer" -version = "0.1.5" +version = "0.2.0" description = "A library for dataset generation and knowledge extraction from foundation computer vision models." readme = "README.md" requires-python = ">=3.8" diff --git a/requirements.txt b/requirements.txt index efbf9b5..e0f23a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ torch>=2.0.0 torchvision>=0.16.0 -transformers>=4.0.0 -diffusers>=0.24.0 +transformers>=4.45.2 +diffusers>=0.31.0 compel>=2.0.0 tqdm>=4.0.0 Pillow>=9.0.0 @@ -12,6 +12,6 @@ accelerate>=0.25.0 scipy>=1.10.0 bitsandbytes>=0.42.0 nltk>=3.8.1 -luxonis-ml[all]>=0.1.0 +luxonis-ml[all]>=0.5.0 python-box>=7.1.1 -gcsfs>=2023.1.0 +gcsfs>=2023.1.0 \ No newline at end of file diff --git a/tests/integration/sample_config.yaml b/tests/core_tests/integration/sample_config.yaml similarity index 74% rename from tests/integration/sample_config.yaml rename to tests/core_tests/integration/sample_config.yaml index 0feb485..c8fed56 100644 --- a/tests/integration/sample_config.yaml +++ b/tests/core_tests/integration/sample_config.yaml @@ -1,7 +1,7 @@ -class_names: [alien, mars, cat] +class_names: [horse, bear, cat] prompts_number: 1 prompt_generator: simple num_objects_range: [2, 3] image_generator: sdxl-turbo conf_threshold: 0.15 -seed: 43 \ No newline at end of file +seed: 42 \ No newline at end of file diff --git a/tests/core_tests/integration/test_pipeline.py b/tests/core_tests/integration/test_pipeline.py new file mode 100644 index 0000000..a49617c --- /dev/null +++ b/tests/core_tests/integration/test_pipeline.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +import os +import subprocess + +import psutil +import pytest +import torch + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_detection_pipeline(cmd: str, target_folder: str): + # Run the command + result = subprocess.run(cmd, shell=True) + assert result.returncode == 0, "Command failed to run" + # Check that the target folder is a folder + assert os.path.isdir(target_folder), "Directory not created" + files = [ + "annotations.json", + "generation_args.yaml", + "prompts.json", + ] + # Check that all the files were created + for file in files: + assert os.path.isfile(os.path.join(target_folder, file)), f"{file} not created" + # Check that an image with an unique was created + assert ( + len( + list( + filter( + lambda x: "image_" in x and ".jpg" in x, os.listdir(target_folder) + ) + ) + ) + > 0 + ), "Images not created" + # Check that the "bboxes_visualization" folder was created + assert os.path.isdir( + os.path.join(target_folder, "bboxes_visualization") + ), "bboxes_visualization directory not created" + + +# ========================================================= +# DETECTION - SIMPLE LM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--synonym_generator wordnet " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien mars cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--synonym_generator wordnet " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# TEST WITH CONFIG FILE +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cpu-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator clip " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_config_classification_pipeline(): + # Define target folder + target_folder = "data/data-cls-cuda-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator clip " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_config_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_config_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-sdxl-turbo-config/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--config ./tests/core_tests/integration/sample_config.yaml " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) diff --git a/tests/core_tests/unittests/test_annotators.py b/tests/core_tests/unittests/test_annotators.py new file mode 100644 index 0000000..4e78df2 --- /dev/null +++ b/tests/core_tests/unittests/test_annotators.py @@ -0,0 +1,155 @@ +from __future__ import annotations + +import numpy as np +import psutil +import pytest +import requests +import torch +from PIL import Image + +from datadreamer.dataset_annotation.clip_annotator import CLIPAnnotator +from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator +from datadreamer.dataset_annotation.slimsam_annotator import SlimSAMAnnotator + +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_owlv2_annotator(device: str, size: str = "base"): + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = OWLv2Annotator(device=device, size=size) + final_boxes, final_scores, final_labels = annotator.annotate_batch( + [im], ["bus", "people"] + ) + # Assert that the boxes, scores and labels are tensors + assert isinstance(final_boxes, list) and len(final_boxes) == 1 + assert isinstance(final_scores, list) and len(final_scores) == 1 + assert isinstance(final_labels, list) and len(final_labels) == 1 + # Get the number of objects detected + num_objects = final_boxes[0].shape[0] + # Check that the boxes has correct shape + assert final_boxes[0].shape == (num_objects, 4) + # Check that the scores has correct shape + assert final_scores[0].shape == (num_objects,) + # Check that the labels has correct shape + assert final_labels[0].shape == (num_objects,) + # Check that the scores are not zero + assert np.all(final_scores[0] > 0) + # Check that the labels are bigger or equal to zero + assert np.all(final_labels[0] >= 0) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_owlv2_annotator(): + _check_owlv2_annotator("cuda") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_owlv2_annotator(): + _check_owlv2_annotator("cpu") + + +def _check_clip_annotator(device: str, size: str = "base"): + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = CLIPAnnotator(device=device, size=size) + labels = annotator.annotate_batch([im], ["bus", "people"]) + # Check that the labels are lists + assert isinstance(labels, list) and len(labels) == 1 + # Check that the labels are ndarray of integers + assert isinstance(labels[0], np.ndarray) and labels[0].dtype == np.int64 + + annotator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_clip_base_annotator(): + _check_clip_annotator("cuda") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_clip_base_annotator(): + _check_clip_annotator("cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_clip_large_annotator(): + _check_clip_annotator("cuda", size="large") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_clip_large_annotator(): + _check_clip_annotator("cpu", size="large") + + +def _check_slimsam_annotator(device: str, size: str = "base"): + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = SlimSAMAnnotator(device=device, size=size) + masks = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])]) + w, h = im.width, im.height + # Check that the masks are lists + assert isinstance(masks, list) and len(masks) == 1 + # Check that the masks are [B, O, N, 2], where + # - B = batch size + # - O = number of objects + # - N = number of points of the mask segment polygon (at least 3 to be polygon) + assert isinstance(masks[0], list) and len(masks[0]) == 1 + assert isinstance(masks[0][0], list) and len(masks[0][0]) >= 3 + for point in masks[0][0]: + # Check that it is a 2D point + assert len(point) == 2 + assert 0 <= point[0] <= w and 0 <= point[1] <= h + + annotator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_slimsam_base_annotator(): + _check_slimsam_annotator("cuda") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_slimsam_base_annotator(): + _check_slimsam_annotator("cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_slimsam_large_annotator(): + _check_slimsam_annotator("cuda", size="large") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_slimsam_large_annotator(): + _check_slimsam_annotator("cpu", size="large") diff --git a/tests/unittests/test_converters.py b/tests/core_tests/unittests/test_converters.py similarity index 89% rename from tests/unittests/test_converters.py rename to tests/core_tests/unittests/test_converters.py index f56ba71..776baea 100644 --- a/tests/unittests/test_converters.py +++ b/tests/core_tests/unittests/test_converters.py @@ -3,11 +3,13 @@ import shutil import unittest +from luxonis_ml.data import LuxonisDataset from PIL import Image from datadreamer.utils import ( BaseConverter, COCOConverter, + LuxonisDatasetConverter, SingleLabelClsConverter, YOLOConverter, ) @@ -248,6 +250,45 @@ def test_create_data_yaml(self): self.assertIn("names: ['cat', 'dog']", content) +class TestLuxonisDatasetConverter(unittest.TestCase): + def setUp(self): + self.test_dir = "test_dataset" + os.makedirs(self.test_dir, exist_ok=True) + + # Create sample images + self.image_size = (100, 100) + self.create_sample_image("0.jpg") + self.create_sample_image("1.jpg") + + # Create sample labels + self.labels = { + "class_names": ["cat", "dog"], + "0.jpg": {"boxes": [(10, 10, 50, 50)], "labels": [0]}, + "1.jpg": {"boxes": [(20, 20, 70, 70)], "labels": [1]}, + } + with open(os.path.join(self.test_dir, "annotations.json"), "w") as f: + json.dump(self.labels, f) + + def tearDown(self): + shutil.rmtree(self.test_dir) + if hasattr(self, self.dataset_name) and LuxonisDataset.exists( + self.dataset_name + ): + dataset = LuxonisDataset(self.dataset_name) + dataset.delete_dataset() + + def create_sample_image(self, filename): + image = Image.new("RGB", self.image_size, color="white") + image.save(os.path.join(self.test_dir, filename)) + + def test_convert(self): + self.dataset_name = "test_dataset" + converter = LuxonisDatasetConverter(dataset_name=self.dataset_name) + split_ratios = [1, 0, 0] + converter.convert(self.test_dir, self.dataset_name, split_ratios) + self.assertTrue(LuxonisDataset.exists(self.dataset_name)) + + class TestSingleLabelClsConverter(unittest.TestCase): def setUp(self): self.converter = SingleLabelClsConverter() diff --git a/tests/unittests/test_image_generation.py b/tests/core_tests/unittests/test_image_generation.py similarity index 67% rename from tests/unittests/test_image_generation.py rename to tests/core_tests/unittests/test_image_generation.py index f91fcc1..6ff15ef 100644 --- a/tests/unittests/test_image_generation.py +++ b/tests/core_tests/unittests/test_image_generation.py @@ -25,6 +25,8 @@ def _check_clip_image_tester(device: str): url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) tester = ClipImageTester(device=device) + # Check that the tester is not None + assert tester is not None passed, probs, num_passed = tester.test_image(im, ["bus"]) # Check that the image passed the test assert passed is True @@ -34,21 +36,29 @@ def _check_clip_image_tester(device: str): assert probs.shape == (1, 1) # Check that the probability is not zero assert probs[0, 0] > 0 - # Release the tester + passed_list, probs_list, num_passed_list = tester.test_images_batch([im], [["bus"]]) + # Check that the image passed the test + assert passed_list[0] is True + # Check that the number of objects passed is correct + assert num_passed_list[0] == 1 + # Check that the probability has correct shape + assert len(probs_list) == 1 + # Check that the probability is not zero + assert probs_list[0][0] > 0 tester.release(empty_cuda_cache=True if device != "cpu" else False) @pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", ) def test_cuda_clip_image_tester(): _check_clip_image_tester("cuda") @pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", ) def test_cpu_clip_image_tester(): _check_clip_image_tester("cpu") @@ -65,6 +75,8 @@ def _check_image_generator( device: str, ): image_generator = image_generator_class(device=device) + # Check that the image generator is not None + assert image_generator is not None # Generate images and check each of them for generated_images_batch in image_generator.generate_images( ["A photo of a cat, dog"], [["cat", "dog"]] @@ -72,24 +84,17 @@ def _check_image_generator( generated_image = generated_images_batch[0] assert generated_image is not None assert isinstance(generated_image, Image.Image) - # Release the generator - image_generator.release(empty_cuda_cache=True if device != "cpu" else False) + images = image_generator.generate_images_batch( + ["A photo of a cat, dog"], + "blurry, bad quality", + ) + assert len(images) == 1 + assert images[0] is not None + assert isinstance(images[0], Image.Image) -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, - reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", -) -def test_cuda_sdxl_image_generator(): - _check_image_generator(StableDiffusionImageGenerator, "cuda") - - -@pytest.mark.skipif( - total_memory < 16 or total_disk_space < 25, - reason="Test requires at least 16GB of RAM and 25GB of HDD", -) -def test_cpu_sdxl_image_generator(): - _check_image_generator(StableDiffusionImageGenerator, "cpu") + # Release the generator + image_generator.release(empty_cuda_cache=True if device != "cpu" else False) @pytest.mark.skipif( @@ -106,19 +111,3 @@ def test_cuda_sdxl_turbo_image_generator(): ) def test_cpu_sdxl_turbo_image_generator(): _check_image_generator(StableDiffusionTurboImageGenerator, "cpu") - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, - reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", -) -def test_cuda_sdxl_lightning_image_generator(): - _check_image_generator(StableDiffusionLightningImageGenerator, "cuda") - - -@pytest.mark.skipif( - total_memory < 16 or total_disk_space < 25, - reason="Test requires at least 16GB of RAM and 25GB of HDD", -) -def test_cpu_sdxl_lightning_image_generator(): - _check_image_generator(StableDiffusionLightningImageGenerator, "cpu") diff --git a/tests/core_tests/unittests/test_pipeline_arguments.py b/tests/core_tests/unittests/test_pipeline_arguments.py new file mode 100644 index 0000000..f435da7 --- /dev/null +++ b/tests/core_tests/unittests/test_pipeline_arguments.py @@ -0,0 +1,213 @@ +from __future__ import annotations + +import subprocess + +import pytest + + +def _check_wrong_argument_choice(cmd: str): + with pytest.raises(subprocess.CalledProcessError): + subprocess.check_call(cmd, shell=True) + + +def _check_wrong_value(cmd: str): + with pytest.raises(ValueError): + try: + subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + raise ValueError(e.output.decode()) from e + + +# ========================================================= +# ARGUMENTS CHECKS +# ========================================================= +def test_invalid_task_value(): + # Define the cmd + cmd = "datadreamer --task invalid_task" + _check_wrong_argument_choice(cmd) + + +def test_invalid_prompts_number_type(): + # Define the cmd + cmd = "datadreamer --prompts_number value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_num_objects_range_type(): + # Define the cmd + cmd = "datadreamer --num_objects_range value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_conf_threshold_range_type(): + # Define the cmd + cmd = "datadreamer --conf_threshold value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_tester_patience_type(): + # Define the cmd + cmd = "datadreamer --image_tester_patience value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_seed_type(): + # Define the cmd + cmd = "datadreamer --seed value --device cpu" + _check_wrong_argument_choice(cmd) + + +def test_invalid_prompt_generator(): + # Define the cmd + cmd = "datadreamer --prompt_generator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_generator(): + # Define the cmd + cmd = "datadreamer --image_generator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_image_annotator(): + # Define the cmd + cmd = "datadreamer --image_annotator invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_det_image_annotator(): + # Define the cmd + cmd = "datadreamer --image_annotator clip" + _check_wrong_argument_choice(cmd) + + +def test_invalid_clf_image_annotator(): + # Define the cmd + cmd = "datadreamer --image_annotator owlv2 --task classification" + _check_wrong_argument_choice(cmd) + + +def test_invalid_device(): + # Define the cmd + cmd = "datadreamer --device invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_invalid_annotator_size(): + # Define the cmd + cmd = "datadreamer --annotator_size invalide_value" + _check_wrong_argument_choice(cmd) + + +def test_empty_class_names(): + # Define the cmd + cmd = "datadreamer --class_names []" + _check_wrong_value(cmd) + + +def test_invalid_class_names(): + # Define the cmd + cmd = "datadreamer --class_names [2, -1]" + _check_wrong_value(cmd) + + +def test_invalid_prompts_number(): + # Define the cmd + cmd = "datadreamer --prompts_number -1" + _check_wrong_value(cmd) + + +def test_negative_conf_threshold(): + # Define the cmd + cmd = "datadreamer --conf_threshold -1" + _check_wrong_value(cmd) + + +def test_big_conf_threshold(): + # Define the cmd + cmd = "datadreamer --conf_threshold 10" + _check_wrong_value(cmd) + + +def test_negative_annotation_iou_threshold(): + # Define the cmd + cmd = "datadreamer --annotation_iou_threshold -1" + _check_wrong_value(cmd) + + +def test_big_annotation_iou_threshold(): + # Define the cmd + cmd = "datadreamer --annotation_iou_threshold 10" + _check_wrong_value(cmd) + + +def test_invalid_image_tester_patience(): + # Define the cmd + cmd = "datadreamer --image_tester_patience -1" + _check_wrong_value(cmd) + + +def test_invalid_seed(): + # Define the cmd + cmd = "datadreamer --seed -1 --device cpu" + _check_wrong_value(cmd) + + +def test_invalid_synonym_generator(): + # Define the cmd + cmd = "datadreamer --device cpu --synonym_generator invalid" + _check_wrong_value(cmd) + + +def test_invalid_lm_quantization(): + # Define the cmd + cmd = "datadreamer --device cude --lm_quantization invalid" + _check_wrong_value(cmd) + + +def test_invalid_device_lm_quantization(): + # Define the cmd + cmd = "datadreamer --device cpu --lm_quantization 4bit" + _check_wrong_value(cmd) + + +def test_invalid_batch_size_prompt(): + # Define the cmd + cmd = "datadreamer --batch_size_prompt -1" + _check_wrong_value(cmd) + + +def test_invalid_batch_size_annotation(): + # Define the cmd + cmd = "datadreamer --batch_size_annotation -1" + _check_wrong_value(cmd) + + +def test_invalid_batch_size_image(): + # Define the cmd + cmd = "datadreamer --batch_size_image -1" + _check_wrong_value(cmd) + + +def test_invalid_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 1" + _check_wrong_value(cmd) + + +def test_many_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 1 2 3" + _check_wrong_value(cmd) + + +def test_desc_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range 3 1" + _check_wrong_value(cmd) + + +def test_negative_num_objects_range(): + # Define the cmd + cmd = "datadreamer --num_objects_range -3 1" + _check_wrong_value(cmd) diff --git a/tests/unittests/test_prompt_generation.py b/tests/core_tests/unittests/test_prompt_generation.py similarity index 73% rename from tests/unittests/test_prompt_generation.py rename to tests/core_tests/unittests/test_prompt_generation.py index e77472d..b74bc0d 100644 --- a/tests/unittests/test_prompt_generation.py +++ b/tests/core_tests/unittests/test_prompt_generation.py @@ -6,6 +6,10 @@ from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator from datadreamer.prompt_generation.lm_synonym_generator import LMSynonymGenerator +from datadreamer.prompt_generation.profanity_filter import ProfanityFilter +from datadreamer.prompt_generation.qwen2_lm_prompt_generator import ( + Qwen2LMPromptGenerator, +) from datadreamer.prompt_generation.simple_prompt_generator import SimplePromptGenerator from datadreamer.prompt_generation.tinyllama_lm_prompt_generator import ( TinyLlamaLMPromptGenerator, @@ -68,14 +72,6 @@ def _check_lm_prompt_generator( prompt_generator.release(empty_cuda_cache=True if device != "cpu" else False) -@pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, - reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", -) -def test_cuda_lm_prompt_generator(): - _check_lm_prompt_generator("cuda") - - @pytest.mark.skipif( total_memory < 12 or not torch.cuda.is_available() or total_disk_space < 25, reason="Test requires at least 12GB of RAM, 25GB of HDD and CUDA support", @@ -85,11 +81,11 @@ def test_cuda_4bit_lm_prompt_generator(): @pytest.mark.skipif( - total_memory < 32 or total_disk_space < 35, - reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", + total_memory < 12 or total_disk_space < 12, + reason="Test requires at least 12GB of RAM and 12GB of HDD for running on CPU", ) -def test_cpu_lm_prompt_generator(): - _check_lm_prompt_generator("cpu") +def test_cpu_tinyllama_lm_prompt_generator(): + _check_lm_prompt_generator("cpu", TinyLlamaLMPromptGenerator) @pytest.mark.skipif( @@ -104,8 +100,24 @@ def test_cuda_tinyllama_lm_prompt_generator(): total_memory < 12 or total_disk_space < 12, reason="Test requires at least 12GB of RAM and 12GB of HDD for running on CPU", ) -def test_cpu_tinyllama_lm_prompt_generator(): - _check_lm_prompt_generator("cpu", TinyLlamaLMPromptGenerator) +def test_cpu_qwen2_lm_prompt_generator(): + _check_lm_prompt_generator("cpu", Qwen2LMPromptGenerator) + + +@pytest.mark.skipif( + total_memory < 10 or not torch.cuda.is_available() or total_disk_space < 12, + reason="Test requires at least 10GB of RAM, 12GB of HDD and CUDA support", +) +def test_cuda_qwen2_lm_prompt_generator(): + _check_lm_prompt_generator("cuda", Qwen2LMPromptGenerator) + + +@pytest.mark.skipif( + total_memory < 10 or not torch.cuda.is_available() or total_disk_space < 12, + reason="Test requires at least 10GB of RAM, 12GB of HDD and CUDA support", +) +def test_cuda_4bit_qwen2_lm_prompt_generator(): + _check_lm_prompt_generator("cuda", Qwen2LMPromptGenerator, quantization="4bit") def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGenerator): @@ -126,29 +138,37 @@ def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGener generator.release(empty_cuda_cache=True if device != "cpu" else False) -@pytest.mark.skipif( - total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, - reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", -) -def test_cuda_synonym_generator(): - _check_synonym_generator("cuda") +def test_cpu_wordnet_synonym_generator(): + _check_synonym_generator("cpu", WordNetSynonymGenerator) @pytest.mark.skipif( - total_memory < 32 or total_disk_space < 35, - reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", + not torch.cuda.is_available(), + reason="Test requires CUDA support", ) -def test_cpu_synonym_generator(): - _check_synonym_generator("cpu") +def test_cuda_wordnet_synonym_generator(): + _check_synonym_generator("cuda", WordNetSynonymGenerator) -def test_cpu_wordnet_synonym_generator(): - _check_synonym_generator("cpu", WordNetSynonymGenerator) +def _check_profanity_filter(device: str) -> None: + """Check the profanity filter. + + Args: + device (str): The device to run the language model on ('cuda' for GPU, 'cpu' for CPU). + """ + profanity_filter = ProfanityFilter(device=device, use_lm=True) + assert profanity_filter.is_safe(["cat", "dog", "plane", "person"]) + assert not profanity_filter.is_safe(["cat", "dog", "ass", "person"]) + profanity_filter.release(empty_cuda_cache=True if device != "cpu" else False) + + +def test_cpu_lm_profanity_filter(): + _check_profanity_filter("cpu") @pytest.mark.skipif( - torch.cuda.is_available(), + not torch.cuda.is_available(), reason="Test requires CUDA support", ) -def test_cuda_wordnet_synonym_generator(): - _check_synonym_generator("cuda", WordNetSynonymGenerator) +def test_cuda_lm_profanity_filter(): + _check_profanity_filter("cuda") diff --git a/tests/core_tests/unittests/test_utils.py b/tests/core_tests/unittests/test_utils.py new file mode 100644 index 0000000..bac169b --- /dev/null +++ b/tests/core_tests/unittests/test_utils.py @@ -0,0 +1,186 @@ +import json +import os +import shutil +import unittest + +import numpy as np +from PIL import Image + +from datadreamer.utils import ( + dataset_utils, + merge_raw_datasets, +) + + +def create_sample_image( + image_name, image_size=(100, 100), color=(255, 0, 0), save_dir="test_images" +): + """Create and save a simple image with a solid color. + + Args: + image_name (str): The name of the image file. + image_size (tuple): The size of the image (width, height). + color (tuple): The RGB color of the image. + save_dir (str): The directory to save the images. + """ + # Create the directory if it doesn't exist + os.makedirs(save_dir, exist_ok=True) + + # Create a blank image with the given color + img = Image.new("RGB", image_size, color) + + # Save the image to the specified directory + img.save(os.path.join(save_dir, image_name)) + + +class TestSaveAnnotationsToJson(unittest.TestCase): + def setUp(self): + # Create a temporary directory for saving images and JSON file + self.test_dir = "test_dir" + self.image_dir = "test_images" + os.makedirs(self.test_dir, exist_ok=True) + os.makedirs(self.image_dir, exist_ok=True) + + # Create sample images + create_sample_image("image1.jpg", save_dir=self.image_dir) + create_sample_image("image2.jpg", save_dir=self.image_dir) + + self.file_name = "annotations.json" + self.image_paths = [ + os.path.join(self.image_dir, "image1.jpg"), + os.path.join(self.image_dir, "image2.jpg"), + ] + self.labels_list = [ + [0], # Labels for image1 + [1], # Labels for image2 + ] + self.labels_list = np.array(self.labels_list) + self.boxes_list = [ + [[10, 10, 50, 50]], # Bounding boxes for image1 + [[20, 20, 40, 40]], # Bounding boxes for image2 + ] + self.boxes_list = np.array(self.boxes_list) + self.class_names = ["class_1", "class_2"] + + def tearDown(self): + # Clean up the test directory after each test + for file in os.listdir(self.test_dir): + os.remove(os.path.join(self.test_dir, file)) + for file in os.listdir(self.image_dir): + os.remove(os.path.join(self.image_dir, file)) + os.rmdir(self.test_dir) + os.rmdir(self.image_dir) + + def test_save_annotations_to_json(self): + # Test saving annotations to JSON + dataset_utils.save_annotations_to_json( + self.image_paths, + self.labels_list, + boxes_list=self.boxes_list, + class_names=self.class_names, + save_dir=self.test_dir, + file_name=self.file_name, + ) + + # Load the saved JSON file and check contents + with open(os.path.join(self.test_dir, self.file_name), "r") as f: + annotations = json.load(f) + + # Check if annotations are correct + self.assertEqual(len(annotations), 3) # 2 images + class_names + self.assertIn("image1.jpg", annotations) + self.assertIn("image2.jpg", annotations) + self.assertEqual(annotations["image1.jpg"]["labels"], [0]) + self.assertEqual(annotations["image2.jpg"]["labels"], [1]) + self.assertEqual(annotations["class_names"], self.class_names) + + +class TestMergeDatasets(unittest.TestCase): + def setUp(self): + # Create temporary directories for test datasets + self.input_dir_1 = "input_dir_1" + self.input_dir_2 = "input_dir_2" + self.input_dir_3 = "input_dir_3" + self.output_dir = "output_dir" + os.makedirs(self.input_dir_1, exist_ok=True) + os.makedirs(self.input_dir_2, exist_ok=True) + os.makedirs(self.input_dir_3, exist_ok=True) + + # Create generation_args.json files + self.generation_args_1 = { + "task": "object_detection", + "class_names": ["class_1", "class_2"], + "seed": 1, + } + self.generation_args_2 = { + "task": "object_detection", + "class_names": ["class_1", "class_2"], + "seed": 2, + } + with open(os.path.join(self.input_dir_1, "generation_args.yaml"), "w") as f: + json.dump(self.generation_args_1, f) + with open(os.path.join(self.input_dir_2, "generation_args.yaml"), "w") as f: + json.dump(self.generation_args_2, f) + + # Create annotations.json files + self.annotations_1 = { + "image1.jpg": {"labels": [0]}, + "image2.jpg": {"labels": [1]}, + "class_names": ["class_1", "class_2"], + } + self.annotations_2 = { + "image3.jpg": {"labels": [0]}, + "image4.jpg": {"labels": [1]}, + "class_names": ["class_1", "class_2"], + } + with open(os.path.join(self.input_dir_1, "annotations.json"), "w") as f: + json.dump(self.annotations_1, f) + with open(os.path.join(self.input_dir_2, "annotations.json"), "w") as f: + json.dump(self.annotations_2, f) + + # Create image files + with open(os.path.join(self.input_dir_1, "image1.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + with open(os.path.join(self.input_dir_1, "image2.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + with open(os.path.join(self.input_dir_2, "image3.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + with open(os.path.join(self.input_dir_2, "image4.jpg"), "wb") as f: + f.write(os.urandom(1024)) # Dummy image content + + def tearDown(self): + # Clean up the test directories after each test + shutil.rmtree(self.input_dir_1) + shutil.rmtree(self.input_dir_2) + if os.path.exists(self.output_dir): + shutil.rmtree(self.output_dir) + + def test_merge_datasets(self): + # Test merging datasets + merge_raw_datasets.merge_datasets( + [self.input_dir_1, self.input_dir_2], self.output_dir, copy_files=True + ) + + # Check if output directory is created + self.assertTrue(os.path.exists(self.output_dir)) + + # Check if annotations.json is merged correctly + with open(os.path.join(self.output_dir, "annotations.json"), "r") as f: + merged_annotations = json.load(f) + + print(merged_annotations) + + self.assertEqual(len(merged_annotations), 5) # 4 images in total + class_names + self.assertIn("image1.jpg", merged_annotations) + self.assertIn("image2.jpg", merged_annotations) + self.assertIn("image3.jpg", merged_annotations) + self.assertIn("image4.jpg", merged_annotations) + self.assertEqual(merged_annotations["class_names"], ["class_1", "class_2"]) + + # Check if images are copied correctly + for image_name in ["image1.jpg", "image2.jpg", "image3.jpg", "image4.jpg"]: + self.assertTrue(os.path.exists(os.path.join(self.output_dir, image_name))) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_pipeline.py b/tests/heavy_tests/integration/test_pipeline_heavy.py similarity index 56% rename from tests/integration/test_pipeline.py rename to tests/heavy_tests/integration/test_pipeline_heavy.py index 293d3a7..6b7dc3f 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/heavy_tests/integration/test_pipeline_heavy.py @@ -8,7 +8,7 @@ import torch # Get the total memory in GB -total_memory = psutil.virtual_memory().total / (1024**3) +total_memory = psutil.virtual_memory().total / (1024 * 3) # Get the total disk space in GB total_disk_space = psutil.disk_usage("/").total / (1024**3) @@ -44,214 +44,6 @@ def _check_detection_pipeline(cmd: str, target_folder: str): ), "bboxes_visualization directory not created" -def _check_wrong_argument_choice(cmd: str): - with pytest.raises(subprocess.CalledProcessError): - subprocess.check_call(cmd, shell=True) - - -def _check_wrong_value(cmd: str): - with pytest.raises(ValueError): - try: - subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) - except subprocess.CalledProcessError as e: - raise ValueError(e.output.decode()) from e - - -# ========================================================= -# ARGUMENTS CHECKS -# ========================================================= -def test_invalid_task_value(): - # Define the cmd - cmd = "datadreamer --task invalid_task" - _check_wrong_argument_choice(cmd) - - -def test_invalid_prompts_number_type(): - # Define the cmd - cmd = "datadreamer --prompts_number value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_num_objects_range_type(): - # Define the cmd - cmd = "datadreamer --num_objects_range value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_conf_threshold_range_type(): - # Define the cmd - cmd = "datadreamer --conf_threshold value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_image_tester_patience_type(): - # Define the cmd - cmd = "datadreamer --image_tester_patience value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_seed_type(): - # Define the cmd - cmd = "datadreamer --seed value --device cpu" - _check_wrong_argument_choice(cmd) - - -def test_invalid_prompt_generator(): - # Define the cmd - cmd = "datadreamer --prompt_generator invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_image_generator(): - # Define the cmd - cmd = "datadreamer --image_generator invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_image_annotator(): - # Define the cmd - cmd = "datadreamer --image_annotator invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_det_image_annotator(): - # Define the cmd - cmd = "datadreamer --image_annotator clip" - _check_wrong_argument_choice(cmd) - - -def test_invalid_clf_image_annotator(): - # Define the cmd - cmd = "datadreamer --image_annotator owlv2 --task classification" - _check_wrong_argument_choice(cmd) - - -def test_invalid_device(): - # Define the cmd - cmd = "datadreamer --device invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_invalid_annotator_size(): - # Define the cmd - cmd = "datadreamer --annotator_size invalide_value" - _check_wrong_argument_choice(cmd) - - -def test_empty_class_names(): - # Define the cmd - cmd = "datadreamer --class_names []" - _check_wrong_value(cmd) - - -def test_invalid_class_names(): - # Define the cmd - cmd = "datadreamer --class_names [2, -1]" - _check_wrong_value(cmd) - - -def test_invalid_prompts_number(): - # Define the cmd - cmd = "datadreamer --prompts_number -1" - _check_wrong_value(cmd) - - -def test_negative_conf_threshold(): - # Define the cmd - cmd = "datadreamer --conf_threshold -1" - _check_wrong_value(cmd) - - -def test_big_conf_threshold(): - # Define the cmd - cmd = "datadreamer --conf_threshold 10" - _check_wrong_value(cmd) - - -def test_negative_annotation_iou_threshold(): - # Define the cmd - cmd = "datadreamer --annotation_iou_threshold -1" - _check_wrong_value(cmd) - - -def test_big_annotation_iou_threshold(): - # Define the cmd - cmd = "datadreamer --annotation_iou_threshold 10" - _check_wrong_value(cmd) - - -def test_invalid_image_tester_patience(): - # Define the cmd - cmd = "datadreamer --image_tester_patience -1" - _check_wrong_value(cmd) - - -def test_invalid_seed(): - # Define the cmd - cmd = "datadreamer --seed -1 --device cpu" - _check_wrong_value(cmd) - - -def test_invalid_synonym_generator(): - # Define the cmd - cmd = "datadreamer --device cpu --synonym_generator invalid" - _check_wrong_value(cmd) - - -def test_invalid_lm_quantization(): - # Define the cmd - cmd = "datadreamer --device cude --lm_quantization invalid" - _check_wrong_value(cmd) - - -def test_invalid_device_lm_quantization(): - # Define the cmd - cmd = "datadreamer --device cpu --lm_quantization 4bit" - _check_wrong_value(cmd) - - -def test_invalid_batch_size_prompt(): - # Define the cmd - cmd = "datadreamer --batch_size_prompt -1" - _check_wrong_value(cmd) - - -def test_invalid_batch_size_annotation(): - # Define the cmd - cmd = "datadreamer --batch_size_annotation -1" - _check_wrong_value(cmd) - - -def test_invalid_batch_size_image(): - # Define the cmd - cmd = "datadreamer --batch_size_image -1" - _check_wrong_value(cmd) - - -def test_invalid_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range 1" - _check_wrong_value(cmd) - - -def test_many_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range 1 2 3" - _check_wrong_value(cmd) - - -def test_desc_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range 3 1" - _check_wrong_value(cmd) - - -def test_negative_num_objects_range(): - # Define the cmd - cmd = "datadreamer --num_objects_range -3 1" - _check_wrong_value(cmd) - - # ========================================================= # DETECTION - SIMPLE LM # ========================================================= @@ -265,7 +57,7 @@ def test_cpu_simple_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -287,7 +79,7 @@ def test_cuda_simple_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -309,7 +101,7 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -332,7 +124,7 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -355,7 +147,7 @@ def test_cpu_simple_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -377,7 +169,7 @@ def test_cuda_simple_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -389,50 +181,6 @@ def test_cuda_simple_sdxl_detection_pipeline(): _check_detection_pipeline(cmd, target_folder) -@pytest.mark.skipif( - total_memory < 16 or total_disk_space < 35, - reason="Test requires at least 16GB of RAM and 35GB of HDD", -) -def test_cpu_simple_sdxl_lightning_detection_pipeline(): - # Define target folder - target_folder = "data/data-det-cpu-simple-sdxl-lightning/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " - f"--prompts_number 1 " - f"--prompt_generator simple " - f"--num_objects_range 1 2 " - f"--image_generator sdxl-lightning " - f"--use_image_tester " - f"--device cpu" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, - reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", -) -def test_cuda_simple_sdxl_lightning_detection_pipeline(): - # Define target folder - target_folder = "data/data-det-cuda-simple-sdxl-lightning/" - # Define the command to run the datadreamer - cmd = ( - f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " - f"--prompts_number 1 " - f"--prompt_generator simple " - f"--num_objects_range 1 2 " - f"--image_generator sdxl-lightning " - f"--use_image_tester " - f"--device cuda" - ) - # Check the run of the pipeline - _check_detection_pipeline(cmd, target_folder) - - # ========================================================= # DETECTION - LLM # ========================================================= @@ -446,7 +194,7 @@ def test_cpu_lm_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -468,7 +216,7 @@ def test_cuda_lm_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -490,7 +238,7 @@ def test_cuda_4bit_lm_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -513,7 +261,7 @@ def test_cpu_lm_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -535,7 +283,7 @@ def test_cuda_lm_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -557,7 +305,7 @@ def test_cuda_4bit_lm_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -583,7 +331,7 @@ def test_cpu_tiny_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -605,7 +353,7 @@ def test_cuda_tiny_sdxl_turbo_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -627,7 +375,7 @@ def test_cpu_tiny_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -649,7 +397,7 @@ def test_cuda_tiny_sdxl_detection_pipeline(): # Define the command to run the datadreamer cmd = ( f"datadreamer --save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -662,7 +410,98 @@ def test_cuda_tiny_sdxl_detection_pipeline(): # ========================================================= -# CLASSIFICATION - SIMPLE LM +# DETECTION - Qwen2.5 LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_qwen2_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-qwen2-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_qwen2_sdxl_turbo_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-qwen2-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_qwen2_sdxl_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cpu-qwen2-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_qwen2_sdxl_detection_pipeline(): + # Define target folder + target_folder = "data/data-det-cuda-qwen2-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - SIMPLE LM # ========================================================= @pytest.mark.skipif( total_memory < 16 or total_disk_space < 35, @@ -675,7 +514,7 @@ def test_cpu_simple_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -699,7 +538,7 @@ def test_cuda_simple_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -723,7 +562,7 @@ def test_cuda_simple_llm_synonym_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -748,7 +587,7 @@ def test_cuda_simple_wordnet_synonym_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--num_objects_range 1 2 " @@ -773,7 +612,7 @@ def test_cpu_simple_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--image_annotator clip " @@ -797,7 +636,7 @@ def test_cuda_simple_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator simple " f"--image_annotator clip " @@ -811,7 +650,7 @@ def test_cuda_simple_sdxl_classification_pipeline(): # ========================================================= -# CLASSIFICATION - LLM +# INSTANCE SEGMENTATION - LLM # ========================================================= @pytest.mark.skipif( total_memory < 32 or total_disk_space < 55, @@ -824,7 +663,7 @@ def test_cpu_lm_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -848,7 +687,7 @@ def test_cuda_lm_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -872,7 +711,7 @@ def test_cuda_4bit_lm_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -897,7 +736,7 @@ def test_cpu_lm_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--image_annotator clip " @@ -921,7 +760,7 @@ def test_cuda_lm_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--image_annotator clip " @@ -945,7 +784,7 @@ def test_cuda_4bit_lm_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator lm " f"--num_objects_range 1 2 " @@ -960,7 +799,7 @@ def test_cuda_4bit_lm_sdxl_classification_pipeline(): # ========================================================= -# CLASSIFICATION - TinyLlama LLM +# INSTANCE SEGMENTATION - TinyLlama LLM # ========================================================= @pytest.mark.skipif( total_memory < 16 or total_disk_space < 35, @@ -973,7 +812,7 @@ def test_cpu_tiny_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--image_annotator clip " @@ -997,7 +836,7 @@ def test_cuda_tiny_sdxl_turbo_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -1021,7 +860,7 @@ def test_cpu_tiny_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -1045,7 +884,7 @@ def test_cuda_tiny_sdxl_classification_pipeline(): cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " - f"--class_names alien mars cat " + f"--class_names alien bear cat " f"--prompts_number 1 " f"--prompt_generator tiny " f"--num_objects_range 1 2 " @@ -1059,20 +898,26 @@ def test_cuda_tiny_sdxl_classification_pipeline(): # ========================================================= -# TEST WITH CONFIG FILE +# CLASSIFICATION - Qwen2.5 LLM # ========================================================= @pytest.mark.skipif( total_memory < 16 or total_disk_space < 35, reason="Test requires at least 16GB of RAM and 35GB of HDD", ) -def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): +def test_cpu_qwen2_sdxl_turbo_classification_pipeline(): # Define target folder - target_folder = "data/data-det-cpu-simple-sdxl-turbo-config/" + target_folder = "data/data-cls-cpu-qwen2-sdxl-turbo/" # Define the command to run the datadreamer cmd = ( - f"datadreamer --save_dir {target_folder} " + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--image_annotator clip " f"--num_objects_range 1 2 " - f"--config ./sample_config.yaml " + f"--image_generator sdxl-turbo " + f"--use_image_tester " f"--device cpu" ) # Check the run of the pipeline @@ -1083,14 +928,20 @@ def test_cpu_simple_sdxl_turbo_config_detection_pipeline(): not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) -def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): +def test_cuda_qwen2_sdxl_turbo_classification_pipeline(): # Define target folder - target_folder = "data/data-det-cuda-simple-sdxl-turbo-config/" + target_folder = "data/data-cls-cuda-qwen2-sdxl-turbo/" # Define the command to run the datadreamer cmd = ( - f"datadreamer --save_dir {target_folder} " + f"datadreamer --task classification " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " - f"--config ./sample_config.yaml " + f"--image_annotator clip " + f"--image_generator sdxl-turbo " + f"--use_image_tester " f"--device cuda" ) # Check the run of the pipeline @@ -1101,16 +952,20 @@ def test_cuda_simple_sdxl_turbo_config_detection_pipeline(): total_memory < 16 or total_disk_space < 35, reason="Test requires at least 16GB of RAM and 35GB of HDD", ) -def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): +def test_cpu_qwen2_sdxl_classification_pipeline(): # Define target folder - target_folder = "data/data-cls-cpu-simple-sdxl-turbo-config/" + target_folder = "data/data-cls-cpu-qwen2-sdxl/" # Define the command to run the datadreamer cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " f"--image_annotator clip " - f"--config ./sample_config.yaml " + f"--image_generator sdxl " + f"--use_image_tester " f"--device cpu" ) # Check the run of the pipeline @@ -1121,16 +976,516 @@ def test_cpu_simple_sdxl_turbo_config_classification_pipeline(): not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", ) -def test_cuda_simple_sdxl_turbo_config_classification_pipeline(): +def test_cuda_qwen2_sdxl_classification_pipeline(): # Define target folder - target_folder = "data/data-cls-cuda-simple-sdxl-turbo-config/" + target_folder = "data/data-cls-cuda-qwen2-sdxl/" # Define the command to run the datadreamer cmd = ( f"datadreamer --task classification " f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " f"--num_objects_range 1 2 " f"--image_annotator clip " - f"--config ./sample_config.yaml " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - SIMPLE LM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 55, + reason="Test requires GPU, at least 16GB of RAM and 55GB of HDD", +) +def test_cuda_simple_llm_synonym_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-llm-synonym-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--image_annotator owlv2-slimsam " + f"--use_image_tester " + f"--synonym_generator llm " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_wordnet_synonym_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-wordnet-synonym-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--synonym_generator wordnet " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_simple_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_simple_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-simple-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator simple " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, 55GB of HDD and CUDA support", +) +def test_cuda_lm_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 14 or not torch.cuda.is_available() or total_disk_space < 45, + reason="Test requires at least 14GB of RAM, 45GB of HDD and CUDA support", +) +def test_cuda_4bit_lm_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-4bit-lm-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--lm_quantization 4bit " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 55, + reason="Test requires at least 32GB of RAM and 55GB of HDD for running on CPU", +) +def test_cpu_lm_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 55, + reason="Test requires at least 16GB of RAM, CUDA support and 55GB of HDD", +) +def test_cuda_lm_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 14 or not torch.cuda.is_available() or total_disk_space < 45, + reason="Test requires at least 14GB of RAM, CUDA support and 45GB of HDD", +) +def test_cuda_4bit_lm_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-4bit-lm-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator lm " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--lm_quantization 4bit " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - TinyLlama LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_tiny_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-tiny-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_tiny_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-tiny-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_tiny_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-tiny-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_tiny_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-tiny-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator tiny " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +# ========================================================= +# INSTANCE SEGMENTATION - Qwen2.5 LLM +# ========================================================= +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_qwen2_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-qwen2-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--image_annotator owlv2-slimsam " + f"--num_objects_range 1 2 " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_qwen2_sdxl_turbo_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-qwen2-sdxl-turbo/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl-turbo " + f"--use_image_tester " + f"--device cuda" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 35, + reason="Test requires at least 16GB of RAM and 35GB of HDD", +) +def test_cpu_qwen2_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cpu-qwen2-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " + f"--device cpu" + ) + # Check the run of the pipeline + _check_detection_pipeline(cmd, target_folder) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 35, + reason="Test requires GPU, at least 16GB of RAM and 35GB of HDD", +) +def test_cuda_qwen2_sdxl_instance_segmentation_pipeline(): + # Define target folder + target_folder = "data/data-inst-seg-cuda-qwen2-sdxl/" + # Define the command to run the datadreamer + cmd = ( + f"datadreamer --task instance-segmentation " + f"--save_dir {target_folder} " + f"--class_names alien bear cat " + f"--prompts_number 1 " + f"--prompt_generator qwen2 " + f"--num_objects_range 1 2 " + f"--image_annotator owlv2-slimsam " + f"--image_generator sdxl " + f"--use_image_tester " f"--device cuda" ) # Check the run of the pipeline diff --git a/tests/heavy_tests/unittests/test_image_generation_heavy.py b/tests/heavy_tests/unittests/test_image_generation_heavy.py new file mode 100644 index 0000000..49721f7 --- /dev/null +++ b/tests/heavy_tests/unittests/test_image_generation_heavy.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from typing import Type, Union + +import psutil +import pytest +import torch +from PIL import Image + +from datadreamer.image_generation import ( + StableDiffusionImageGenerator, + StableDiffusionLightningImageGenerator, + StableDiffusionTurboImageGenerator, +) + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_image_generator( + image_generator_class: Type[ + Union[ + StableDiffusionImageGenerator, + StableDiffusionTurboImageGenerator, + StableDiffusionLightningImageGenerator, + ] + ], + device: str, +): + image_generator = image_generator_class(device=device) + # Check that the image generator is not None + assert image_generator is not None + # Generate images and check each of them + for generated_images_batch in image_generator.generate_images( + ["A photo of a cat, dog"], [["cat", "dog"]] + ): + generated_image = generated_images_batch[0] + assert generated_image is not None + assert isinstance(generated_image, Image.Image) + + images = image_generator.generate_images_batch( + ["A photo of a cat, dog"], + "blurry, bad quality", + ) + assert len(images) == 1 + assert images[0] is not None + assert isinstance(images[0], Image.Image) + + # Release the generator + image_generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", +) +def test_cuda_sdxl_image_generator(): + _check_image_generator(StableDiffusionImageGenerator, "cuda") + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", +) +def test_cpu_sdxl_image_generator(): + _check_image_generator(StableDiffusionImageGenerator, "cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_memory < 16 or total_disk_space < 25, + reason="Test requires GPU, at least 16GB of RAM and 25GB of HDD", +) +def test_cuda_sdxl_lightning_image_generator(): + _check_image_generator(StableDiffusionLightningImageGenerator, "cuda") + + +@pytest.mark.skipif( + total_memory < 16 or total_disk_space < 25, + reason="Test requires at least 16GB of RAM and 25GB of HDD", +) +def test_cpu_sdxl_lightning_image_generator(): + _check_image_generator(StableDiffusionLightningImageGenerator, "cpu") diff --git a/tests/heavy_tests/unittests/test_prompt_generation_heavy.py b/tests/heavy_tests/unittests/test_prompt_generation_heavy.py new file mode 100644 index 0000000..a943f5c --- /dev/null +++ b/tests/heavy_tests/unittests/test_prompt_generation_heavy.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import psutil +import pytest +import torch + +from datadreamer.prompt_generation.lm_prompt_generator import LMPromptGenerator +from datadreamer.prompt_generation.lm_synonym_generator import LMSynonymGenerator + +# Get the total memory in GB +total_memory = psutil.virtual_memory().total / (1024**3) +# Get the total disk space in GB +total_disk_space = psutil.disk_usage("/").total / (1024**3) + + +def _check_lm_prompt_generator( + device: str, prompt_generator_class=LMPromptGenerator, quantization: str = "none" +): + object_names = ["aeroplane", "bicycle", "bird", "boat"] + prompt_generator = prompt_generator_class( + class_names=object_names, + prompts_number=2, + device=device, + quantization=quantization, + ) + prompts = prompt_generator.generate_prompts() + # Check that the some prompts were generated + assert len(prompts) > 0 + # Iterate through the prompts + for selected_objects, prompt_text in prompts: + # Selected objects aren't empty + assert len(selected_objects) > 0 + # The slected objects are in the range + assert ( + prompt_generator.num_objects_range[0] + <= len(selected_objects) + <= prompt_generator.num_objects_range[1] + ) + # Check the generated text + assert len(prompt_text) > 0 and prompt_text.lower().startswith("a photo of") + prompt_generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", +) +def test_cuda_lm_prompt_generator(): + _check_lm_prompt_generator("cuda") + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 35, + reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", +) +def test_cpu_lm_prompt_generator(): + _check_lm_prompt_generator("cpu") + + +def _check_synonym_generator(device: str, synonym_generator_class=LMSynonymGenerator): + synonyms_num = 3 + generator = synonym_generator_class(synonyms_number=synonyms_num, device=device) + synonyms = generator.generate_synonyms_for_list(["astronaut", "cat", "dog"]) + # Check that the some synonyms were generated + assert len(synonyms) > 0 + # Iterate through the synonyms + for word, synonym_list in synonyms.items(): + # Check that the word is not empty + assert len(word) > 0 + # Check that the synonym list is not empty + assert len(synonym_list) > 0 + # Check that the synonyms are not empty + for synonym in synonym_list: + assert len(synonym) > 0 + generator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + total_memory < 16 or not torch.cuda.is_available() or total_disk_space < 35, + reason="Test requires at least 16GB of RAM, 35GB of HDD and CUDA support", +) +def test_cuda_synonym_generator(): + _check_synonym_generator("cuda") + + +@pytest.mark.skipif( + total_memory < 32 or total_disk_space < 35, + reason="Test requires at least 28GB of RAM and 35GB of HDD for running on CPU", +) +def test_cpu_synonym_generator(): + _check_synonym_generator("cpu") diff --git a/tests/unittests/test_annotators.py b/tests/unittests/test_annotators.py deleted file mode 100644 index 698ed3d..0000000 --- a/tests/unittests/test_annotators.py +++ /dev/null @@ -1,98 +0,0 @@ -from __future__ import annotations - -import numpy as np -import psutil -import pytest -import requests -import torch -from PIL import Image - -from datadreamer.dataset_annotation.clip_annotator import CLIPAnnotator -from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator - -# Get the total disk space in GB -total_disk_space = psutil.disk_usage("/").total / (1024**3) - - -def _check_owlv2_annotator(device: str, size: str = "base"): - url = "https://ultralytics.com/images/bus.jpg" - im = Image.open(requests.get(url, stream=True).raw) - annotator = OWLv2Annotator(device=device, size=size) - final_boxes, final_scores, final_labels = annotator.annotate_batch( - [im], ["bus", "people"] - ) - # Assert that the boxes, scores and labels are tensors - assert isinstance(final_boxes, list) and len(final_boxes) == 1 - assert isinstance(final_scores, list) and len(final_scores) == 1 - assert isinstance(final_labels, list) and len(final_labels) == 1 - # Get the number of objects detected - num_objects = final_boxes[0].shape[0] - # Check that the boxes has correct shape - assert final_boxes[0].shape == (num_objects, 4) - # Check that the scores has correct shape - assert final_scores[0].shape == (num_objects,) - # Check that the labels has correct shape - assert final_labels[0].shape == (num_objects,) - # Check that the scores are not zero - assert np.all(final_scores[0] > 0) - # Check that the labels are bigger or equal to zero - assert np.all(final_labels[0] >= 0) - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", -) -def test_cuda_owlv2_annotator(): - _check_owlv2_annotator("cuda") - - -@pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", -) -def test_cpu_owlv2_annotator(): - _check_owlv2_annotator("cpu") - - -def _check_clip_annotator(device: str, size: str = "base"): - url = "https://ultralytics.com/images/bus.jpg" - im = Image.open(requests.get(url, stream=True).raw) - annotator = CLIPAnnotator(device=device, size=size) - labels = annotator.annotate_batch([im], ["bus", "people"]) - # Check that the labels are lists - assert isinstance(labels, list) and len(labels) == 1 - # Check that the labels are ndarray of integers - assert isinstance(labels[0], np.ndarray) and labels[0].dtype == np.int64 - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", -) -def test_cuda_clip_base_annotator(): - _check_clip_annotator("cuda") - - -@pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", -) -def test_cpu_clip_base_annotator(): - _check_clip_annotator("cpu") - - -@pytest.mark.skipif( - not torch.cuda.is_available() or total_disk_space < 15, - reason="Test requires GPU and 15GB of HDD", -) -def test_cuda_clip_large_annotator(): - _check_clip_annotator("cuda") - - -@pytest.mark.skipif( - total_disk_space < 15, - reason="Test requires at least 15GB of HDD", -) -def test_cpu_clip_large_annotator(): - _check_clip_annotator("cpu")