Skip to content

Commit

Permalink
feat(WIP): add support for custom metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
whoseoyster committed Jun 20, 2024
1 parent a86cce5 commit 6afc952
Showing 1 changed file with 188 additions and 0 deletions.
188 changes: 188 additions & 0 deletions src/openlayer/lib/core/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Module containing the BaseMetric definition for Openlayer."""

from __future__ import annotations

import abc
import argparse
import json
import os
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List, Optional, Union

import pandas as pd


@dataclass
class MetricReturn:
"""The return type of the `run` method in the BaseMetric."""

value: Union[float, int, bool]
"""The value of the metric."""

unit: Optional[str] = None
"""The unit of the metric."""

meta: Dict[str, Any] = field(default_factory=dict)
"""Any useful metadata in a JSON serializable dict."""


@dataclass
class Dataset:
"""A dataset object containing the configuration, data and dataset outputs path."""

name: str
"""The name of the dataset."""

config: dict
"""The configuration of the dataset."""

df: pd.DataFrame
"""The dataset as a pandas DataFrame."""

output_path: str
"""The path to the dataset outputs."""


class MetricRunner:
"""A class to run a list of metrics."""

def __init__(self):
self.config_path: str = ""
self.config: Dict[str, Any] = {}
self.datasets: List[Dataset] = []
self.selected_metrics: Optional[List[str]] = None

def run_metrics(self, metrics: List[BaseMetric]) -> None:
"""Run a list of metrics."""

# Parse arguments from the command line
self._parse_args()

# Load the openlayer.json file
self._load_openlayer_json()

# Load the datasets from the openlayer.json file
self._load_datasets()

# TODO: Auto-load all the metrics in the current directory

self._compute_metrics(metrics)

def _parse_args(self) -> None:
parser = argparse.ArgumentParser(description="Compute custom metrics.")
parser.add_argument(
"--config-path",
type=str,
required=False,
default="",
help="The path to your openlayer.json. Uses working dir if not provided.",
)

# Parse the arguments
args = parser.parse_args()
self.config_path = args.config_path

def _load_openlayer_json(self) -> None:
"""Load the openlayer.json file."""

if not self.config_path:
openlayer_json_path = os.path.join(os.getcwd(), "openlayer.json")
else:
openlayer_json_path = self.config_path

with open(openlayer_json_path, "r", encoding="utf-8") as f:
self.config = json.load(f)

# Extract selected metrics
if "metrics" in self.config and "settings" in self.config["metrics"]:
self.selected_metrics = [
metric["key"] for metric in self.config["metrics"]["settings"] if metric["selected"]
]

def _load_datasets(self) -> None:
"""Compute the metric from the command line."""

datasets: List[Dataset] = []

# Check first for a model. If it exists, use the output of the model
if "model" in self.config:
model = self.config["model"]
datasets_list = self.config["datasets"]
dataset_names = [dataset["name"] for dataset in datasets_list]
output_directory = model["outputDirectory"]
# Read the outputs directory for dataset folders. For each, load
# the config.json and the dataset.json files into a dict and a dataframe

for dataset_folder in os.listdir(output_directory):
if dataset_folder not in dataset_names:
continue
dataset_path = os.path.join(output_directory, dataset_folder)
config_path = os.path.join(dataset_path, "config.json")
with open(config_path, "r", encoding="utf-8") as f:
dataset_config = json.load(f)

# Load the dataset into a pandas DataFrame
if os.path.exists(os.path.join(dataset_path, "dataset.csv")):
dataset_df = pd.read_csv(os.path.join(dataset_path, "dataset.csv"))
elif os.path.exists(os.path.join(dataset_path, "dataset.json")):
dataset_df = pd.read_json(os.path.join(dataset_path, "dataset.json"), orient="records")
else:
raise ValueError(f"No dataset found in {dataset_folder}.")

datasets.append(
Dataset(name=dataset_folder, config=dataset_config, df=dataset_df, output_path=dataset_path)
)
else:
raise ValueError("No model found in the openlayer.json file. Cannot compute metric.")

if not datasets:
raise ValueError("No datasets found in the openlayer.json file. Cannot compute metric.")

self.datasets = datasets

def _compute_metrics(self, metrics: List[BaseMetric]) -> None:
"""Compute the metrics."""
for metric in metrics:
if self.selected_metrics and metric.key not in self.selected_metrics:
print(f"Skipping metric {metric.key} as it is not a selected metric.")
continue
metric.compute(self.datasets)


class BaseMetric(abc.ABC):
"""Interface for the Base metric.
Your metric's class should inherit from this class and implement the compute method.
"""

@property
def key(self) -> str:
"""Return the key of the metric."""
return self.__class__.__name__

def compute(self, datasets: List[Dataset]) -> None:
"""Compute the metric on the model outputs."""
for dataset in datasets:
metric_return = self.compute_on_dataset(dataset.config, dataset.df)
metric_value = metric_return.value
if metric_return.unit:
metric_value = f"{metric_value} {metric_return.unit}"
print(f"Metric ({self.key}) value for {dataset.name}: {metric_value}")

output_dir = os.path.join(dataset.output_path, "metrics")
self._write_metric_return_to_file(metric_return, output_dir)

@abc.abstractmethod
def compute_on_dataset(self, config: dict, df: pd.DataFrame) -> MetricReturn:
"""Compute the metric on a specific dataset."""
pass

def _write_metric_return_to_file(self, metric_return: MetricReturn, output_dir: str) -> None:
"""Write the metric return to a file."""

# Create the directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

with open(os.path.join(output_dir, f"{self.key}.json"), "w", encoding="utf-8") as f:
json.dump(asdict(metric_return), f, indent=4)
print(f"Metric ({self.key}) value written to {output_dir}/{self.key}.json")

0 comments on commit 6afc952

Please sign in to comment.