From 24e47cdaaf924f0550aca2fc047e6332d34207f4 Mon Sep 17 00:00:00 2001 From: Parthib Samadder Date: Tue, 12 Dec 2023 10:58:30 -0800 Subject: [PATCH] OPEN-5555: Notifications for Reference Dataset Uploaded/Failure and Monitoring Data Limit Reached --- openlayer/__init__.py | 4 ++++ openlayer/validators/commit_validators.py | 2 ++ openlayer/validators/dataset_validators.py | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/openlayer/__init__.py b/openlayer/__init__.py index 9085c0f5..187203be 100644 --- a/openlayer/__init__.py +++ b/openlayer/__init__.py @@ -1037,6 +1037,10 @@ def upload_reference_dataset( dataset_data = DatasetSchema().load( {"task_type": task_type.value, **dataset_config} ) + dataset_data = { + "taskType": task_type.value, + **dataset_config, + } # Add default columns if not present if dataset_data.get("columnNames") is None: dataset_data["columnNames"] = utils.get_column_names(file_path) diff --git a/openlayer/validators/commit_validators.py b/openlayer/validators/commit_validators.py index 3ec63140..efa9ec98 100644 --- a/openlayer/validators/commit_validators.py +++ b/openlayer/validators/commit_validators.py @@ -30,6 +30,8 @@ class BaseCommitBundleValidator(BaseValidator, ABC): Whether to skip dataset validation, by default False use_runner : bool Whether to use the runner to validate the model, by default False. + log_file_path : Optional[str], optional + The path to the log file, by default None """ def __init__( diff --git a/openlayer/validators/dataset_validators.py b/openlayer/validators/dataset_validators.py index a4a11a1d..440ea47d 100644 --- a/openlayer/validators/dataset_validators.py +++ b/openlayer/validators/dataset_validators.py @@ -38,6 +38,8 @@ class BaseDatasetValidator(BaseValidator, ABC): The path to the dataset file. dataset_df : pd.DataFrame, optional The dataset to validate. + log_file_path : str, optional + The path to the log file. """ def __init__( @@ -47,8 +49,18 @@ def __init__( dataset_config: Optional[Dict] = None, dataset_file_path: Optional[str] = None, dataset_df: Optional[pd.DataFrame] = None, + log_file_path: Optional[str] = None, ): super().__init__(resource_display_name="dataset") + + if log_file_path: + bundle_file_handler = logging.FileHandler(log_file_path) + bundle_formatter = logging.Formatter( + "[%(asctime)s] - %(levelname)s - %(message)s" + ) + bundle_file_handler.setFormatter(bundle_formatter) + logger.addHandler(bundle_file_handler) + if dataset_df is not None and dataset_file_path: raise ValueError( "Both dataset_df and dataset_file_path are provided." @@ -930,6 +942,7 @@ def get_validator( dataset_config: Optional[Dict] = None, dataset_file_path: Optional[str] = None, dataset_df: Optional[pd.DataFrame] = None, + log_file_path: Optional[str] = None, ) -> BaseDatasetValidator: """Factory function to get the correct dataset validator for the task type. @@ -945,6 +958,8 @@ def get_validator( The path to the dataset file. dataset_df : pd.DataFrame, optional The dataset to validate. + log_file_path : str, optional + The path to the log file. Returns ------- @@ -985,6 +1000,7 @@ def get_validator( dataset_file_path=dataset_file_path, dataset_df=dataset_df, task_type=task_type, + log_file_path=log_file_path, ) elif task_type == tasks.TaskType.TabularRegression: return TabularRegressionDatasetValidator( @@ -993,6 +1009,7 @@ def get_validator( dataset_file_path=dataset_file_path, dataset_df=dataset_df, task_type=task_type, + log_file_path=log_file_path, ) elif task_type == tasks.TaskType.TextClassification: return TextClassificationDatasetValidator( @@ -1001,6 +1018,7 @@ def get_validator( dataset_file_path=dataset_file_path, dataset_df=dataset_df, task_type=task_type, + log_file_path=log_file_path, ) elif task_type in [ tasks.TaskType.LLM, @@ -1015,6 +1033,7 @@ def get_validator( dataset_file_path=dataset_file_path, dataset_df=dataset_df, task_type=task_type, + log_file_path=log_file_path, ) else: raise ValueError(f"Task type `{task_type}` is not supported.")