diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 203e62b1..6f25850c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -80,11 +80,11 @@ repos: # requirements-ml.txt scikit-learn>=0.23.2, - keras>=2.4.3, + 'keras>=2.4.3,<3.0.0', rapidfuzz>=2.6.1, - tensorflow>=2.6.4; sys.platform != 'darwin', - tensorflow>=2.6.4; sys_platform == 'darwin' and platform_machine != 'arm64', - tensorflow-macos>=2.6.4; sys_platform == 'darwin' and platform_machine == 'arm64', + "tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin'", + "tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64'", + "tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64'", tqdm>=4.0.0, # requirements-reports.txt diff --git a/dataprofiler/profilers/profiler_options.py b/dataprofiler/profilers/profiler_options.py index 038acf80..d4bde4fa 100644 --- a/dataprofiler/profilers/profiler_options.py +++ b/dataprofiler/profilers/profiler_options.py @@ -4,6 +4,7 @@ import abc import copy +import json import re import warnings from typing import Any, Generic, TypeVar, cast @@ -193,6 +194,15 @@ def __init__(self, is_enabled: bool = True) -> None: """ self.is_enabled = is_enabled + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :return: str of the option properties + :rtype: str + """ + return str(self.is_enabled) + def _validate_helper(self, variable_path: str = "BooleanOption") -> list[str]: """ Validate the options do not conflict and cause errors. @@ -958,6 +968,25 @@ def __init__( self.cms_relative_error = cms_relative_error self.cms_max_num_heavy_hitters = cms_max_num_heavy_hitters + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :vartype dict_string: dict + :return: str of the option properties + :rtype: str + """ + dict_string: dict = {"CategoricalOptions": []} + for iter_option in [ + a + for a in dir(self) + if not a.startswith("__") and not callable(getattr(self, a)) + ]: + dict_string["CategoricalOptions"].append( + {str(iter_option): str(getattr(self, iter_option))} + ) + return json.dumps(dict_string, indent=4) + def _validate_helper(self, variable_path: str = "CategoricalOptions") -> list[str]: """ Validate the options do not conflict and cause errors. @@ -1182,6 +1211,25 @@ def __init__( ) self.null_count: BooleanOption = BooleanOption(is_enabled=null_count) + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :vartype dict_string: dict + :return: str of the option properties + :rtype: str + """ + dict_string: dict = {"RowStatisticsOptions": []} + for iter_option in [ + a + for a in dir(self) + if not a.startswith("__") and not callable(getattr(self, a)) + ]: + dict_string["RowStatisticsOptions"].append( + {str(iter_option): str(getattr(self, iter_option))} + ) + return json.dumps(dict_string, indent=4) + def _validate_helper( self, variable_path: str = "RowStatisticsOptions" ) -> list[str]: @@ -1228,6 +1276,25 @@ def __init__(self) -> None: self.max_sample_size: int | None = None self.data_labeler_object: BaseDataLabeler | None = None + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :vartype dict_string: dict + :return: str of the option properties + :rtype: str + """ + dict_string: dict = {"DataLabelerOptions": []} + for iter_option in [ + a + for a in dir(self) + if not a.startswith("__") and not callable(getattr(self, a)) + ]: + dict_string["DataLabelerOptions"].append( + {str(iter_option): str(getattr(self, iter_option))} + ) + return json.dumps(dict_string, indent=4) + def __deepcopy__(self, memo: dict) -> DataLabelerOptions: """ Override deepcopy for data labeler object. @@ -1370,6 +1437,25 @@ def __init__( self.vocab: BooleanOption = BooleanOption(is_enabled=True) self.words: BooleanOption = BooleanOption(is_enabled=True) + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :vartype dict_string: dict + :return: str of the option properties + :rtype: str + """ + dict_string: dict = {"TextProfilerOptions": []} + for iter_option in [ + a + for a in dir(self) + if not a.startswith("__") and not callable(getattr(self, a)) + ]: + dict_string["TextProfilerOptions"].append( + {str(iter_option): str(getattr(self, iter_option))} + ) + return json.dumps(dict_string, indent=4) + def _validate_helper(self, variable_path: str = "TextProfilerOptions") -> list[str]: """ Validate the options do not conflict and cause errors. @@ -1488,6 +1574,25 @@ def __init__( self.column_null_values = column_null_values self.sampling_ratio = sampling_ratio + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :vartype dict_string: dict + :return: str of the option properties + :rtype: str + """ + dict_string: dict = {"StructuredOptions": []} + for iter_option in [ + a + for a in dir(self) + if not a.startswith("__") and not callable(getattr(self, a)) + ]: + dict_string["StructuredOptions"].append( + {str(iter_option): str(getattr(self, iter_option))} + ) + return json.dumps(dict_string, indent=4) + @property def enabled_profiles(self) -> list[str]: """Return a list of the enabled profilers for columns.""" @@ -1638,6 +1743,25 @@ def __init__(self) -> None: self.text = TextProfilerOptions() self.data_labeler = DataLabelerOptions() + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :vartype dict_string: dict + :return: str of the option properties + :rtype: str + """ + dict_string: dict = {"UnstructuredOptions": []} + for iter_option in [ + a + for a in dir(self) + if not a.startswith("__") and not callable(getattr(self, a)) + ]: + dict_string["UnstructuredOptions"].append( + {str(iter_option): str(getattr(self, iter_option))} + ) + return json.dumps(dict_string, indent=4) + @property def enabled_profiles(self) -> list[str]: """Return a list of the enabled profilers.""" @@ -1715,6 +1839,17 @@ def __init__(self, presets: str = None) -> None: else: raise ValueError("The preset entered is not a valid preset.") + def __str__(self) -> str: + """ + Return a human friendly consumable output in string form. + + :return: str of the option presets and properties + :rtype: str + """ + return f"Presets: {str(self.presets)}\n \ + {str(self.structured_options)}\n \ + {str(self.unstructured_options)}" + def _complete_presets(self) -> None: self.set({"*.is_enabled": True}) diff --git a/requirements-ml.txt b/requirements-ml.txt index 08e1312a..ff525fec 100644 --- a/requirements-ml.txt +++ b/requirements-ml.txt @@ -1,7 +1,7 @@ scikit-learn>=0.23.2 -keras>=2.4.3 +keras>=2.4.3,<3.0.0 rapidfuzz>=2.6.1 -tensorflow>=2.6.4; sys.platform != 'darwin' -tensorflow>=2.6.4; sys_platform == 'darwin' and platform_machine != 'arm64' -tensorflow-macos>=2.6.4; sys_platform == 'darwin' and platform_machine == 'arm64' +tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin' +tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64' +tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64' tqdm>=4.0.0 diff --git a/requirements-test.txt b/requirements-test.txt index df4be852..40f26a11 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,5 @@ coverage>=5.0.1 -dask>=2.29.0 +dask>=2.29.0,<2024.2.0 fsspec>=0.3.3 pytest>=6.0.1 pytest-cov>=2.8.1