Skip to content

Commit

Permalink
OPEN-5555: Notifications for Reference Dataset Uploaded/Failure and M…
Browse files Browse the repository at this point in the history
…onitoring Data Limit Reached
  • Loading branch information
Parthib committed Dec 12, 2023
1 parent 86653bc commit 24e47cd
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 0 deletions.
4 changes: 4 additions & 0 deletions openlayer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,10 @@ def upload_reference_dataset(
dataset_data = DatasetSchema().load(
{"task_type": task_type.value, **dataset_config}
)
dataset_data = {
"taskType": task_type.value,
**dataset_config,
}
# Add default columns if not present
if dataset_data.get("columnNames") is None:
dataset_data["columnNames"] = utils.get_column_names(file_path)
Expand Down
2 changes: 2 additions & 0 deletions openlayer/validators/commit_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class BaseCommitBundleValidator(BaseValidator, ABC):
Whether to skip dataset validation, by default False
use_runner : bool
Whether to use the runner to validate the model, by default False.
log_file_path : Optional[str], optional
The path to the log file, by default None
"""

def __init__(
Expand Down
19 changes: 19 additions & 0 deletions openlayer/validators/dataset_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class BaseDatasetValidator(BaseValidator, ABC):
The path to the dataset file.
dataset_df : pd.DataFrame, optional
The dataset to validate.
log_file_path : str, optional
The path to the log file.
"""

def __init__(
Expand All @@ -47,8 +49,18 @@ def __init__(
dataset_config: Optional[Dict] = None,
dataset_file_path: Optional[str] = None,
dataset_df: Optional[pd.DataFrame] = None,
log_file_path: Optional[str] = None,
):
super().__init__(resource_display_name="dataset")

if log_file_path:
bundle_file_handler = logging.FileHandler(log_file_path)
bundle_formatter = logging.Formatter(
"[%(asctime)s] - %(levelname)s - %(message)s"
)
bundle_file_handler.setFormatter(bundle_formatter)
logger.addHandler(bundle_file_handler)

if dataset_df is not None and dataset_file_path:
raise ValueError(
"Both dataset_df and dataset_file_path are provided."
Expand Down Expand Up @@ -930,6 +942,7 @@ def get_validator(
dataset_config: Optional[Dict] = None,
dataset_file_path: Optional[str] = None,
dataset_df: Optional[pd.DataFrame] = None,
log_file_path: Optional[str] = None,
) -> BaseDatasetValidator:
"""Factory function to get the correct dataset validator for the task type.
Expand All @@ -945,6 +958,8 @@ def get_validator(
The path to the dataset file.
dataset_df : pd.DataFrame, optional
The dataset to validate.
log_file_path : str, optional
The path to the log file.
Returns
-------
Expand Down Expand Up @@ -985,6 +1000,7 @@ def get_validator(
dataset_file_path=dataset_file_path,
dataset_df=dataset_df,
task_type=task_type,
log_file_path=log_file_path,
)
elif task_type == tasks.TaskType.TabularRegression:
return TabularRegressionDatasetValidator(
Expand All @@ -993,6 +1009,7 @@ def get_validator(
dataset_file_path=dataset_file_path,
dataset_df=dataset_df,
task_type=task_type,
log_file_path=log_file_path,
)
elif task_type == tasks.TaskType.TextClassification:
return TextClassificationDatasetValidator(
Expand All @@ -1001,6 +1018,7 @@ def get_validator(
dataset_file_path=dataset_file_path,
dataset_df=dataset_df,
task_type=task_type,
log_file_path=log_file_path,
)
elif task_type in [
tasks.TaskType.LLM,
Expand All @@ -1015,6 +1033,7 @@ def get_validator(
dataset_file_path=dataset_file_path,
dataset_df=dataset_df,
task_type=task_type,
log_file_path=log_file_path,
)
else:
raise ValueError(f"Task type `{task_type}` is not supported.")
Expand Down

0 comments on commit 24e47cd

Please sign in to comment.