From e22e5e8fe003f604ed8c684441c5bffb09bea530 Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Sun, 28 Jan 2024 17:43:27 -0800 Subject: [PATCH 01/12] fixed formatting --- src/cleanvision/__init__.py | 2 +- src/cleanvision/imagelab.py | 1 + .../issue_managers/image_property.py | 81 ++++++++++++------- .../image_property_issue_manager.py | 25 +++--- 4 files changed, 65 insertions(+), 44 deletions(-) diff --git a/src/cleanvision/__init__.py b/src/cleanvision/__init__.py index 5447a0f7..3fea630f 100644 --- a/src/cleanvision/__init__.py +++ b/src/cleanvision/__init__.py @@ -12,7 +12,7 @@ def get_version() -> str: else: import importlib_metadata - return importlib_metadata.version("cleanvision") # type:ignore + return importlib_metadata.version("cleanvision") try: diff --git a/src/cleanvision/imagelab.py b/src/cleanvision/imagelab.py index a0ed5842..17fd36b4 100644 --- a/src/cleanvision/imagelab.py +++ b/src/cleanvision/imagelab.py @@ -3,6 +3,7 @@ The methods in this module should suffice for most use-cases, but advanced users can get extra flexibility via the code in other CleanVision modules. """ + from __future__ import annotations import random diff --git a/src/cleanvision/issue_managers/image_property.py b/src/cleanvision/issue_managers/image_property.py index 5b0b59c0..898dec11 100644 --- a/src/cleanvision/issue_managers/image_property.py +++ b/src/cleanvision/issue_managers/image_property.py @@ -1,10 +1,10 @@ import math from abc import ABC, abstractmethod -from typing import List, Dict, Any, Union, overload +from typing import Any, Dict, List, Optional, Union, overload import numpy as np import pandas as pd -from PIL import ImageStat, ImageFilter +from PIL import ImageFilter, ImageStat from PIL.Image import Image from cleanvision.issue_managers import IssueType @@ -48,12 +48,16 @@ def get_scores( return def mark_issue( - self, scores: pd.DataFrame, threshold: float, issue_type: str + self, + scores: pd.DataFrame, + issue_type: str, + threshold: Optional[float] = None, ) -> pd.DataFrame: is_issue = pd.DataFrame(index=scores.index) - is_issue[get_is_issue_colname(issue_type)] = ( - scores[get_score_colname(issue_type)] < threshold - ) + is_issue_colname, score_colname = get_is_issue_colname( + issue_type + ), get_score_colname(issue_type) + is_issue[is_issue_colname] = scores[score_colname] < threshold return is_issue @@ -127,9 +131,11 @@ def score_columns(self) -> List[str]: def __init__(self, issue_type: str) -> None: self.issue_type = issue_type self._score_columns = [ - "brightness_perc_99" - if self.issue_type == IssueType.DARK.value - else "brightness_perc_5" + ( + "brightness_perc_99" + if self.issue_type == IssueType.DARK.value + else "brightness_perc_5" + ) ] def calculate(self, image: Image) -> Dict[str, Union[float, str]]: @@ -294,8 +300,8 @@ def calc_color_space(image: Image) -> str: def calc_image_area_sqrt(image: Image) -> float: - size = image.size - return math.sqrt(size[0] * size[1]) + w, h = image.size + return math.sqrt(w) * math.sqrt(h) class ColorSpaceProperty(ImageProperty): @@ -326,12 +332,14 @@ def get_scores( return scores def mark_issue( - self, scores: pd.DataFrame, threshold: float, issue_type: str + self, scores: pd.DataFrame, issue_type: str, threshold: Optional[float] = None ) -> pd.DataFrame: is_issue = pd.DataFrame(index=scores.index) - is_issue[get_is_issue_colname(issue_type)] = ( - 1 - scores[get_score_colname(issue_type)] - ).astype("bool") + is_issue_colname, score_colname = get_is_issue_colname( + issue_type + ), get_score_colname(issue_type) + + is_issue[is_issue_colname] = (1 - scores[score_colname]).astype("bool") return is_issue @@ -344,6 +352,7 @@ def score_columns(self) -> List[str]: def __init__(self) -> None: self._score_columns = [self.name] + self.threshold = 0.5 # todo: this ensures that the scores are evenly distributed across the range def calculate(self, image: Image) -> Dict[str, Union[float, str]]: return {self.name: calc_image_area_sqrt(image)} @@ -352,35 +361,45 @@ def get_scores( self, raw_scores: pd.DataFrame, issue_type: str, + iqr_factor: float = 3.0, **kwargs: Any, ) -> pd.DataFrame: super().get_scores(raw_scores, issue_type, **kwargs) assert raw_scores is not None - image_size_scores = raw_scores[self.score_columns[0]] - median_image_size = image_size_scores.median() - size_ratios = image_size_scores / median_image_size - - # Computing the values of the two divisions - size_division_1 = size_ratios - size_division_2 = 1.0 / size_ratios + size = raw_scores[self.name] + q1, q3 = np.percentile(size, [25, 75]) + size_iqr = q3 - q1 + min_threshold, max_threshold = ( + q1 - iqr_factor * size_iqr, + q3 + iqr_factor * size_iqr, + ) + mid_threshold = (min_threshold + max_threshold) / 2 + threshold_distance = (max_threshold - min_threshold) / 2 + distance = np.absolute(size - mid_threshold) - # Using np.minimum to determine the element-wise minimum value between the two divisions - size_scores = np.minimum(size_division_1, size_division_2) + if threshold_distance > 0: + norm_dist = (distance * self.threshold) / threshold_distance + score_values = 1 - np.clip(norm_dist, 0, 1) + else: + norm_value = np.min(distance) / 0.5 + norm_dist = distance / norm_value + score_values = 1 - np.clip(norm_dist, 0, 1) scores = pd.DataFrame(index=raw_scores.index) - scores[get_score_colname(issue_type)] = size_scores + scores[get_score_colname(issue_type)] = score_values return scores def mark_issue( - self, scores: pd.DataFrame, threshold: float, issue_type: str + self, scores: pd.DataFrame, issue_type: str, threshold: Optional[float] = None ) -> pd.DataFrame: + threshold = self.threshold if threshold is None else threshold + is_issue_colname, score_colname = get_is_issue_colname( + issue_type + ), get_score_colname(issue_type) + is_issue = pd.DataFrame(index=scores.index) - is_issue[get_is_issue_colname(issue_type)] = np.where( - scores[get_score_colname(issue_type)] < 1.0 / threshold, - True, - False, - ) + is_issue[is_issue_colname] = scores[score_colname] < threshold return is_issue diff --git a/src/cleanvision/issue_managers/image_property_issue_manager.py b/src/cleanvision/issue_managers/image_property_issue_manager.py index da2a85a1..ad6be8be 100644 --- a/src/cleanvision/issue_managers/image_property_issue_manager.py +++ b/src/cleanvision/issue_managers/image_property_issue_manager.py @@ -1,30 +1,27 @@ import multiprocessing -from typing import Dict, Any, List, Set, Optional, Union +from typing import Any, Dict, List, Optional, Set, Union import pandas as pd from tqdm.auto import tqdm from cleanvision.dataset.base_dataset import Dataset -from cleanvision.issue_managers import register_issue_manager, IssueType +from cleanvision.issue_managers import IssueType, register_issue_manager from cleanvision.issue_managers.image_property import ( - BrightnessProperty, AspectRatioProperty, - EntropyProperty, BlurrinessProperty, + BrightnessProperty, ColorSpaceProperty, + EntropyProperty, ImageProperty, SizeProperty, ) from cleanvision.utils.base_issue_manager import IssueManager from cleanvision.utils.constants import ( IMAGE_PROPERTY, - MAX_PROCS, IMAGE_PROPERTY_ISSUE_TYPES_LIST, + MAX_PROCS, ) -from cleanvision.utils.utils import ( - get_is_issue_colname, - update_df, -) +from cleanvision.utils.utils import get_is_issue_colname, update_df def compute_scores( @@ -72,7 +69,7 @@ def get_default_params(self) -> Dict[str, Any]: "color_threshold": 0.18, }, IssueType.GRAYSCALE.value: {}, - IssueType.ODD_SIZE.value: {"threshold": 10.0}, + IssueType.ODD_SIZE.value: {"iqr_factor": 3.0}, } def update_params(self, params: Dict[str, Any]) -> None: @@ -203,11 +200,15 @@ def update_issues( score_columns = agg_computations[score_column_names] issue_scores = self.image_properties[issue_type].get_scores( - score_columns, issue_type, **self.params[issue_type] + raw_scores=score_columns, + issue_type=issue_type, + **self.params[issue_type], ) is_issue = self.image_properties[issue_type].mark_issue( - issue_scores, self.params[issue_type].get("threshold"), issue_type + scores=issue_scores, + issue_type=issue_type, + threshold=self.params[issue_type].get("threshold"), ) self.issues = self.issues.join(issue_scores) self.issues = self.issues.join(is_issue) From 5314972093a47cf779829eda4f8efa74512948cb Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Sun, 28 Jan 2024 23:45:00 -0800 Subject: [PATCH 02/12] updated flake8 check --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e0c1e662..04315435 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,7 +64,7 @@ jobs: - name: Install flake8 run: pip install flake8 - name: Lint with flake8 - run: flake8 --ignore=E203,E501,E722,E401,W503 src tests --count --show-source --statistics + run: flake8 --ignore=E203,E501,E722,E401,W503,E704 src tests --count --show-source --statistics nblint: name: Lint Notebooks runs-on: ubuntu-latest From 2e4831938348899a2cb324de42943c89c061db33 Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Wed, 31 Jan 2024 15:22:38 -0800 Subject: [PATCH 03/12] Fixed test --- tests/test_image_property_helpers.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_image_property_helpers.py b/tests/test_image_property_helpers.py index 633cb01f..60da6a13 100644 --- a/tests/test_image_property_helpers.py +++ b/tests/test_image_property_helpers.py @@ -2,18 +2,18 @@ import pandas as pd import pytest from PIL import Image +from pytest import approx import cleanvision -import math from cleanvision.issue_managers import IssueType from cleanvision.issue_managers.image_property import ( BrightnessProperty, - calculate_brightness, - get_image_mode, calc_aspect_ratio, + calc_blurriness, calc_entropy, calc_image_area_sqrt, - calc_blurriness, + calculate_brightness, + get_image_mode, ) from cleanvision.utils.utils import get_is_issue_colname, get_score_colname @@ -54,8 +54,8 @@ def test_calc_bluriness(): def test_calc_area(): img = Image.new("RGB", (200, 200), (255, 0, 0)) - area = calc_image_area_sqrt(img) # img.size[0] * img.size[1] - assert area == math.sqrt(200 * 200) + area = calc_image_area_sqrt(img) + assert area == approx(200) @pytest.mark.parametrize( From b563265be7e9501d9a18581f11f5368d4192acf6 Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Wed, 31 Jan 2024 15:30:28 -0800 Subject: [PATCH 04/12] Fixed test --- tests/test_image_property_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_image_property_helpers.py b/tests/test_image_property_helpers.py index 60da6a13..c6bf9fe2 100644 --- a/tests/test_image_property_helpers.py +++ b/tests/test_image_property_helpers.py @@ -137,5 +137,5 @@ def test_get_scores(self, image_property, issue_type, expected_output): ], ) def test_mark_issue(self, image_property, scores, threshold, expected_mark): - mark = image_property.mark_issue(scores, threshold, "fake_issue") + mark = image_property.mark_issue(scores, "fake_issue", threshold) assert all(mark == expected_mark) From ec1fd5723b9efcebcb3a8d25b96b3a26e1aeacfd Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Wed, 31 Jan 2024 15:54:52 -0800 Subject: [PATCH 05/12] Removed unncessary dependencies in dev req file --- .github/workflows/ci.yml | 3 ++- requirements-dev.txt | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 138e71a9..5ae6bb2e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,7 +29,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest pytest-cov psutil -e ".[all]" + pip install -e ".[all]" + pip install -r requirements-dev.txt shell: bash - name: Test with coverage run: pytest --verbose --cov=src/cleanvision/ --cov-config .coveragerc --cov-report=xml tests/ diff --git a/requirements-dev.txt b/requirements-dev.txt index 24cc38b3..40d9db0c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,9 +4,6 @@ mypy pre-commit pytest pytest-cov -pytest-lazy-fixture -datasets>=2.7.0 -torchvision>=0.12.0 black build flake8 From 1cd9f1a9d975431fc2a0440ed4aab0eff468c9b1 Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Wed, 31 Jan 2024 17:56:22 -0800 Subject: [PATCH 06/12] Fixed odd size tests --- .../issue_managers/image_property.py | 18 ++++++++----- tests/test_run.py | 27 ++++++++++--------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/cleanvision/issue_managers/image_property.py b/src/cleanvision/issue_managers/image_property.py index 3c970676..c59aa341 100644 --- a/src/cleanvision/issue_managers/image_property.py +++ b/src/cleanvision/issue_managers/image_property.py @@ -373,16 +373,20 @@ def get_scores( q3 + iqr_factor * size_iqr, ) mid_threshold = (min_threshold + max_threshold) / 2 - threshold_distance = (max_threshold - min_threshold) / 2 + threshold_gap = max_threshold - min_threshold distance = np.absolute(size - mid_threshold) - if threshold_distance > 0: - norm_dist = (distance * self.threshold) / threshold_distance - score_values = 1 - np.clip(norm_dist, 0, 1) + if threshold_gap > 0: + norm_value = threshold_gap + self.threshold = 0.5 + elif threshold_gap == 0: + norm_value = mid_threshold + self.threshold = 1.0 else: - norm_value = np.min(distance) / 0.5 - norm_dist = distance / norm_value - score_values = 1 - np.clip(norm_dist, 0, 1) + raise ValueError("threshold_gap should be non negative") + + norm_dist = distance / norm_value + score_values = 1 - np.clip(norm_dist, 0, 1) scores = pd.DataFrame(index=raw_scores.index) scores[get_score_colname(issue_type)] = score_values diff --git a/tests/test_run.py b/tests/test_run.py index 47807156..e3ea405b 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -220,7 +220,7 @@ def test_odd_size_too_large_image(generate_local_dataset_once): sqrt(width * height)*threshold(default 10),is_odd_size_issue is set to True. In this example, the median area is sqrt(300x300) so 300. An image with 3001 x 3001 has an value of 3001 so its more than 10x smaller and thus should be flagged. """ - arr = np.random.randint(low=0, high=256, size=(3001, 3001, 3), dtype=np.uint8) + arr = np.random.randint(low=0, high=256, size=(400, 400, 3), dtype=np.uint8) img = Image.fromarray(arr, mode="RGB") img.save(Path(generate_local_dataset_once / "class_0" / "larger.png")) @@ -244,7 +244,7 @@ def test_odd_size_too_small_image(generate_local_dataset_once): arr = np.random.randint( low=0, high=256, - size=(29, 29, 3), + size=(200, 200, 3), dtype=np.uint8, # 30 x 30 pixel image should be detected ) img = Image.fromarray(arr, mode="RGB") @@ -265,23 +265,26 @@ def test_custom_threshold_for_odd_size(generate_local_dataset_once): With default threshold the small image would be flagged (See test_filepath_dataset_size_to_small). However, with a custom threshold of 11 instead of 10, the imaage is within the allowed range and should not be flagged. """ - arr = np.random.randint( - low=0, - high=256, - size=(29, 29, 3), - dtype=np.uint8, # 29 x 29 pixel image should not be detected with threshold 11 - ) - img = Image.fromarray(arr, mode="RGB") - img.save(Path(generate_local_dataset_once / "class_0" / "smaller.png")) + for i in range(5): + arr = np.random.randint( + low=0, + high=256, + size=(100 * (i + 1), 100 * (i + 1), 3), + dtype=np.uint8, + ) + img = Image.fromarray(arr, mode="RGB") + img.save(Path(generate_local_dataset_once / "class_0" / f"odd_{i}.png")) files = os.listdir(generate_local_dataset_once / "class_0") filepaths = [ os.path.join(generate_local_dataset_once / "class_0", f) for f in files ] imagelab = Imagelab(filepaths=filepaths) - imagelab.find_issues({"odd_size": {"threshold": 11.0}}) + imagelab.find_issues( + {"odd_size": {"threshold": 0.5}} + ) # for this case default threshold is 1.0 assert len(imagelab.issues.columns) == 2 # Only size - assert len(imagelab.issues[imagelab.issues["is_odd_size_issue"]]) == 0 + assert len(imagelab.issues[imagelab.issues["is_odd_size_issue"]]) == 2 def test_list_default_issue_types(): From 96f014376c47e529ac48544f54059e85d8a29370 Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Wed, 31 Jan 2024 18:10:28 -0800 Subject: [PATCH 07/12] Fixed mypy error' --- src/cleanvision/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cleanvision/__init__.py b/src/cleanvision/__init__.py index 3fea630f..514086ab 100644 --- a/src/cleanvision/__init__.py +++ b/src/cleanvision/__init__.py @@ -1,10 +1,12 @@ import sys +from typing import Any + from cleanvision.imagelab import Imagelab as _Imagelab PYTHON_VERSION_INFO = sys.version_info -def get_version() -> str: +def get_version() -> str | Any: if sys.version_info.major >= 3 and sys.version_info.minor >= 8: import importlib.metadata From 8c73b2efddb8bc133b4ebc8358d5f7493239ab3c Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Wed, 31 Jan 2024 23:06:57 -0800 Subject: [PATCH 08/12] Fixed typing syntax --- src/cleanvision/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cleanvision/__init__.py b/src/cleanvision/__init__.py index 514086ab..c4f5fb71 100644 --- a/src/cleanvision/__init__.py +++ b/src/cleanvision/__init__.py @@ -1,12 +1,12 @@ import sys -from typing import Any +from typing import Any, Union from cleanvision.imagelab import Imagelab as _Imagelab PYTHON_VERSION_INFO = sys.version_info -def get_version() -> str | Any: +def get_version() -> Union[str, Any]: if sys.version_info.major >= 3 and sys.version_info.minor >= 8: import importlib.metadata From 459ffaf10b630498b1679d5bcd0aad068f7517e0 Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Thu, 1 Feb 2024 15:16:30 -0800 Subject: [PATCH 09/12] Updated odd size title key --- src/cleanvision/imagelab.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cleanvision/imagelab.py b/src/cleanvision/imagelab.py index 17fd36b4..2c98f9e1 100644 --- a/src/cleanvision/imagelab.py +++ b/src/cleanvision/imagelab.py @@ -502,9 +502,7 @@ def _visualize( if show_id: title_info["ids"] = [f"id : {i}" for i in indices] if issue_type == IssueType.ODD_SIZE.value: - title_info["size"] = [ - f"original size: {image.size}" for image in images - ] + title_info["size"] = [f"size: {image.size}" for image in images] if images: VizManager.individual_images( From 50076abea3f7136c4a81f5f692a6ad77d7109d77 Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Thu, 1 Feb 2024 15:16:46 -0800 Subject: [PATCH 10/12] Update info['statistics'] with describe stats --- .../image_property_issue_manager.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/cleanvision/issue_managers/image_property_issue_manager.py b/src/cleanvision/issue_managers/image_property_issue_manager.py index ad6be8be..facc92ab 100644 --- a/src/cleanvision/issue_managers/image_property_issue_manager.py +++ b/src/cleanvision/issue_managers/image_property_issue_manager.py @@ -241,23 +241,20 @@ def update_info(self, agg_computations: pd.DataFrame) -> None: issue_type: self.image_properties[issue_type].name for issue_type in self.issue_types } - issue_columns = { - issue_type: [ - col - for col in agg_computations.columns - if col.startswith(property_names[issue_type] + "_") - ] - for issue_type in self.issue_types - } for issue_type in self.issue_types: - self.info["statistics"][property_names[issue_type]] = agg_computations[ - property_names[issue_type] + property_name = property_names[issue_type] + + self.info["statistics"][property_name] = agg_computations[ + property_name + ].describe() + + issue_columns = [ + col for col in agg_computations.columns if col.startswith(property_name) ] + self.info[issue_type] = ( - agg_computations[issue_columns[issue_type]] - if len(issue_columns[issue_type]) > 0 - else {} + agg_computations[issue_columns] if len(issue_columns) > 0 else {} ) def update_summary(self) -> None: From 1e311fe8fb07873091cc744a801b38743c77fd1d Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Thu, 1 Feb 2024 15:18:27 -0800 Subject: [PATCH 11/12] Updated tutorial notebook --- docs/source/tutorials/tutorial.ipynb | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/docs/source/tutorials/tutorial.ipynb b/docs/source/tutorials/tutorial.ipynb index 233ce4bb..b5b2fdda 100644 --- a/docs/source/tutorials/tutorial.ipynb +++ b/docs/source/tutorials/tutorial.ipynb @@ -107,7 +107,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 1. Using CleanVision to detect default issue types" + "### 1. Using CleanVision to detect issues in your dataset" ] }, { @@ -124,9 +124,6 @@ "# Initialize imagelab with your dataset\n", "imagelab = Imagelab(data_path=dataset_path)\n", "\n", - "# Visualize a few sample images from the dataset\n", - "imagelab.visualize(num_images=8)\n", - "\n", "# Find issues\n", "imagelab.find_issues()" ] @@ -153,17 +150,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The main way to interface with your data is via the `Imagelab` class. This class can be used to understand the issues in your dataset at a high level (global overview) and low level (issues and quality scores for each image) as well as additional information about the dataset. It has three main attributes:\n", + "The main way to interface with your data is via the [Imagelab](https://cleanvision.readthedocs.io/en/latest/cleanvision/imagelab.html#cleanvision.imagelab.Imagelab) class. This class can be used to understand the issues in your dataset at a high level (global overview) and low level (issues and quality scores for each image) as well as additional information about the dataset. It has three main attributes:\n", + "\n", "- `Imagelab.issue_summary`\n", "- `Imagelab.issues`\n", "- `Imagelab.info`\n", "\n", "#### imagelab.issue_summary\n", - "Dataframe with global summary of all issue types detected in your dataset and the overall prevalence of each type.\n", + "This is a Dataframe containing a comprehensive summary of all detected issue types within your dataset, along with their respective prevalence levels. Each row in this summary includes the following information:\n", "\n", - "In each row:\\\n", - "`issue_type` - name of the issue\\\n", - "`num_images` - number of images of that issue type found in the dataset" + "`issue_type`: The name of the detected issue.\\\n", + "`num_images`: The number of images exhibiting the identified issue within the dataset." ] }, { @@ -301,7 +298,7 @@ "tags": [] }, "source": [ - "You can see **entropy** values for each image in the dataset as shown below." + "You can see **size** statistics for the dataset below. Here we observe, both the 25th and 75th percentile are 256 for the dataset, hence images that are further away from this range are detected as oddly sized." ] }, { @@ -310,7 +307,7 @@ "metadata": {}, "outputs": [], "source": [ - "imagelab.info[\"statistics\"][\"entropy\"]" + "imagelab.info[\"statistics\"][\"size\"]" ] }, { From ab5089529bc540445f990d957c4193063eb4383c Mon Sep 17 00:00:00 2001 From: Sanjana Garg Date: Thu, 1 Feb 2024 15:32:39 -0800 Subject: [PATCH 12/12] Fixed tests --- tests/test_run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_run.py b/tests/test_run.py index e3ea405b..7267c14e 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -112,6 +112,7 @@ def test_incremental_issue_finding(generate_local_dataset, len_dataset): "brightness_perc_10", "brightness_perc_5", "brightness_perc_1", + "brightness", ] ) == set(imagelab.info["light"].keys())