From ed86d63d5760e506c2f2f2bbe0cc3b3707b82b5e Mon Sep 17 00:00:00 2001 From: Michal Tyrolski Date: Tue, 22 Aug 2023 13:17:39 +0200 Subject: [PATCH] Neptune, disable caching, remove unused files (#30) * versionizing * neptune --- docs/__init__.html | 30 -- docs/aggregators.html | 294 ---------------- docs/clustering.html | 474 -------------------------- docs/downloader.html | 89 ----- docs/experiment.html | 465 ------------------------- docs/experiment_test.html | 333 ------------------ docs/metric.html | 80 ----- docs/neptune_downloader.html | 351 ------------------- docs/noise.html | 59 ---- docs/paths.html | 95 ------ docs/paths_test.html | 80 ----- docs/pycco.css | 190 ----------- docs/wizzards.html | 319 ----------------- exphub/download/neptune_downloader.py | 22 +- exphub/relaunchers/__init__.py | 0 exphub/relaunchers/validator.py | 141 -------- poetry.lock | 8 +- pyproject.toml | 2 +- 18 files changed, 16 insertions(+), 3016 deletions(-) delete mode 100644 docs/__init__.html delete mode 100644 docs/aggregators.html delete mode 100644 docs/clustering.html delete mode 100644 docs/downloader.html delete mode 100644 docs/experiment.html delete mode 100644 docs/experiment_test.html delete mode 100644 docs/metric.html delete mode 100644 docs/neptune_downloader.html delete mode 100644 docs/noise.html delete mode 100644 docs/paths.html delete mode 100644 docs/paths_test.html delete mode 100644 docs/pycco.css delete mode 100644 docs/wizzards.html delete mode 100644 exphub/relaunchers/__init__.py delete mode 100644 exphub/relaunchers/validator.py diff --git a/docs/__init__.html b/docs/__init__.html deleted file mode 100644 index c24a87f..0000000 --- a/docs/__init__.html +++ /dev/null @@ -1,30 +0,0 @@ - - - - - __init__.py - - - -
-
-
-

__init__.py

-
-
-
-
-
- # -
- -
-
-

-
-
-
-
-
-
- diff --git a/docs/aggregators.html b/docs/aggregators.html deleted file mode 100644 index 3208f92..0000000 --- a/docs/aggregators.html +++ /dev/null @@ -1,294 +0,0 @@ - - - - - aggregators.py - - - -
-
-
-

aggregators.py

-
-
-
-
-
- # -
- -
-
-
import pandas as pd
-from typing import List, Any
-import functools
-
-import functools
-import pandas as pd
-from typing import Any, List
-
-
-
-
-
-
- # -
-

A class that represents an aggregator function to be applied to a DataFrame.

-
-
-
class Aggregator:
-
-
-
-
-
-
- # -
-

An Aggregator object is a callable that takes a DataFrame and an optional inplace -parameter and returns the modified DataFrame with a new column appended to it. The -new column’s values are computed using the aggregator function specified in the -Aggregator object’s constructor.

-
-
-
-
-
-
-
-
-
- # -
-

Constructs an Aggregator object.

-
-
-
    def __init__(self, fn, label: Any) -> None:
-
-
-
-
-
-
- # -
-

Args: - fn (function): The aggregator function that will be applied to the - DataFrame. The function should take a DataFrame as input and return - a Series or a DataFrame. - label (Any): The label to be used for the new column in the DataFrame.

-
-
-
        self.fn = fn
-        self.label = label
-
-
-
-
-
-
- # -
-

Modifies a DataFrame by adding a new column computed using the aggregator function.

-
-
-
    def __call__(self, incoming_df: pd.DataFrame, inplace: bool = False) -> Any:
-
-
-
-
-
-
- # -
-

Args: - incoming_df (pd.DataFrame): The DataFrame to be modified. - inplace (bool, optional): If True, the incoming DataFrame will be modified in place. - If False, a copy of the DataFrame will be created and modified. Defaults to False.

-

Returns: - Any: The modified DataFrame.

-
-
-
        df = incoming_df.copy() if not inplace else incoming_df
-        df[self.label] = self.fn(df)
-        return df
-
-
-
-
-
-
- # -
-

A class that represents a chain of Aggregator objects.

-
-
-
class AggregatorChain:
-
-
-
-
-
-
- # -
-

An AggregatorChain object is a callable that takes a DataFrame and an optional -inplace parameter and returns the modified DataFrame after applying all the -Aggregator objects in the chain, in the order they were added.

-
-
-
-
-
-
-
-
-
- # -
-

Constructs an AggregatorChain object.

-
-
-
    def __init__(self, aggs: List[Aggregator]) -> None:
-
-
-
-
-
-
- # -
-

Args: - aggs (List[Aggregator]): A list of Aggregator objects to be applied in the chain.

-
-
-
        self.aggs = aggs
-
-
-
-
-
-
- # -
-

Modifies a DataFrame by applying a chain of Aggregator objects.

-
-
-
    def __call__(self, df: pd.DataFrame, inplace=False) -> Any:
-
-
-
-
-
-
- # -
-

Args: - df (pd.DataFrame): The DataFrame to be modified. - inplace (bool, optional): If True, the incoming DataFrame will be modified in place. - If False, a copy of the DataFrame will be created and modified. Defaults to False.

-

Returns: - Any: The modified DataFrame.

-
-
-
        df_n = df.copy() if not inplace else df
-        return functools.reduce(lambda x, y: y(x, inplace=False), self.aggs, df_n)
-
-
-
-
-
-
- # -
-

Returns a list of labels for the new columns added

-
-
-
    def labels(self) -> List[str]:
-
-
-
-
-
-
- # -
- -
-
-
        return list(map(lambda x: x.label, self.aggs))
-
-
-
-
-
-
- # -
-

Adds another AggregatorChain object to the current one.

-
-
-
    def __add__(self, other: 'AggregatorChain') -> 'AggregatorChain':
-
-
-
-
-
-
- # -
-

Args: - other (AggregatorChain): The AggregatorChain object to be added.

-

Returns: - AggregatorChain: A new AggregatorChain object that is the result of adding the two - AggregatorChain objects.

-
-
-
        return AggregatorChain(self.aggs + other.aggs)
-
-
-
-
-
-
- # -
-

A class that stores a collection of predefined Aggregator chains.

-
-
-
class Vault:
-
-
-
-
-
-
- # -
-

The Vault class provides a convenient way to retrieve predefined -Aggregator chains by name. The available Aggregator chains are stored as -class attributes.

-
-
-
    MEAN = AggregatorChain([Aggregator(lambda x: x.mean(axis=1, numeric_only=True), 'mean')])
-    _NO_AGGR = AggregatorChain([Aggregator(lambda x: x.mean(axis=1, numeric_only=True), 'no_aggr')])
-    MEAN_STD = AggregatorChain([
-        Aggregator(lambda df: df.mean(axis=1, numeric_only=True), 'mean'),
-        Aggregator(lambda df: df.mean(axis=1, numeric_only=True) - df.std(axis=1), 'mean_std_minus'),
-        Aggregator(lambda df: df.mean(axis=1, numeric_only=True) + df.std(axis=1), 'mean_std_plus'),
-    ])
-    MIN = AggregatorChain([Aggregator(lambda x: x.min(axis=1, numeric_only=True), 'min')])
-    MAX = AggregatorChain([Aggregator(lambda x: x.max(axis=1, numeric_only=True), 'max')])
-    SUM = AggregatorChain([Aggregator(lambda x: x.sum(axis=1, numeric_only=True), 'sum')])
-    MEDIAN = AggregatorChain([Aggregator(lambda x: x.median(axis=1, numeric_only=True), 'median')])
-    VAR = AggregatorChain([Aggregator(lambda x: x.var(axis=1, numeric_only=True), 'var')])
-    STD = AggregatorChain([Aggregator(lambda x: x.std(axis=1, numeric_only=True), 'std')])
-
-
-
-
-
-
- diff --git a/docs/clustering.html b/docs/clustering.html deleted file mode 100644 index 1a329db..0000000 --- a/docs/clustering.html +++ /dev/null @@ -1,474 +0,0 @@ - - - - - clustering.py - - - -
-
-
-

clustering.py

-
-
-
-
-
- # -
- -
-
-
from exphub.metrics.metric import Metric
-from abc import abstractmethod
-from typing import Any
-from sklearn.metrics import (rand_score, adjusted_rand_score, mutual_info_score, normalized_mutual_info_score,
-                             adjusted_mutual_info_score, fowlkes_mallows_score, silhouette_score,
-                             calinski_harabasz_score, davies_bouldin_score)
-
-
-
-
-
-
- # -
-

An abstract base class for clustering extrinsic metrics. Extrinsic metrics evaluate the -quality of a clustering algorithm by comparing its output to a ground truth clustering.

-
-
-
class ClusterExtrinsicMetric(Metric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __init__(self) -> None:
-        super().__init__()
-
-
-
-
-
-
- # -
-

An abstract base class for clustering intrinsic metrics. Intrinsic metrics evaluate the -quality of a clustering algorithm without reference to a ground truth clustering.

-
-
-
    @abstractmethod
-    def __call__(self, y_pred, y_gt, **kwargs) -> Any:
-        pass
-
-
-class ClusterIntrinsicMetric(Metric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __init__(self) -> None:
-        super().__init__()
-
-
-
-
-
-
- # -
-

Calculates the Rand Index, a measure of the similarity between two clusterings.

-
-
-
    @abstractmethod
-    def __call__(self, embeddings, labels, **kwargs) -> Any:
-        pass
-
-
-class RandIndex(ClusterExtrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, y_pred, y_gt, **kwargs) -> Any:
-        return rand_score(y_gt, y_pred)
-
-
-
-
-
-
- # -
-

Calculates the Adjusted Rand Index, a measure of the similarity between two clusterings -that is adjusted for chance.

-
-
-
class AdjustedRandIndex(ClusterExtrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, y_pred, y_gt, **kwargs) -> Any:
-        return adjusted_rand_score(y_gt, y_pred)
-
-
-
-
-
-
- # -
-

Calculates the Mutual Information Score, a measure of the similarity between two clusterings -based on the amount of shared information.

-
-
-
class MutualInfoScore(ClusterExtrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, y_pred, y_gt, **kwargs) -> Any:
-        return mutual_info_score(y_gt, y_pred)
-
-
-
-
-
-
- # -
-

Calculates the Normalized Mutual Information Score, a measure of the similarity between -two clusterings based on the normalized amount of shared information.

-
-
-
class NormalizedMutualInfoScore(ClusterExtrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, y_pred, y_gt, **kwargs) -> Any:
-        return normalized_mutual_info_score(y_gt, y_pred)
-
-
-
-
-
-
- # -
-

Calculates the Adjusted Mutual Information Score, a measure of the similarity between -two clusterings based on the amount of shared information, adjusted for chance.

-
-
-
class AdjustedMutualInfoScore(ClusterExtrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, y_pred, y_gt, **kwargs) -> Any:
-        return adjusted_mutual_info_score(y_gt, y_pred)
-
-
-
-
-
-
- # -
-

Calculates the Fowlkes-Mallows Score, a measure of the similarity between two clusterings -based on the geometric mean of precision and recall.

-
-
-
class FowlkesMallowsScore(ClusterExtrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, y_pred, y_gt, **kwargs) -> Any:
-        return fowlkes_mallows_score(y_gt, y_pred)
-
-
-
-
-
-
- # -
-

Calculates the Silhouette Score, a measure of how similar an object is to its own cluster -compared to other clusters. A higher score indicates better clustering quality.

-
-
-
class SilhouetteScore(ClusterIntrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, embeddings, labels, **kwargs) -> Any:
-        if len(embeddings) != len(labels):
-            raise ValueError(
-                f"Embeddings and cluster y_gt must have the same length. Got {len(embeddings)} and {len(labels)}")
-        return silhouette_score(embeddings, labels)
-
-
-
-
-
-
- # -
-

Calculates the Calinski-Harabasz Score, also known as the Variance Ratio Criterion. This score -is a measure of cluster dispersion, where a higher value indicates better clustering quality. -The score is defined as the ratio of the between-cluster dispersion to the within-cluster dispersion.

-
-
-
class CalinskiHarabaszScore(ClusterIntrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, embeddings, labels, **kwargs) -> Any:
-        if len(embeddings) != len(labels):
-            raise ValueError(
-                f"Embeddings and cluster y_gt must have the same length. Got {len(embeddings)} and {len(labels)}")
-        return calinski_harabasz_score(embeddings, labels)
-
-
-
-
-
-
- # -
-

Calculates the Davies-Bouldin Score, a measure of cluster quality based on the average -similarity between clusters. A lower score indicates better clustering quality. -The score is defined as the average of the maximum similarity between each cluster and -all other clusters, where similarity is the ratio of within-cluster distances to between-cluster distances.

-
-
-
class DaviesBouldinScore(ClusterIntrinsicMetric):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
- -
-
-
    def __call__(self, embeddings, labels, **kwargs) -> Any:
-        if len(embeddings) != len(labels):
-            raise ValueError(
-                f"Embeddings and cluster y_gt must have the same length. Got {len(embeddings)} and {len(labels)}")
-        return davies_bouldin_score(embeddings, labels)
-
-
-
-
-
-
- diff --git a/docs/downloader.html b/docs/downloader.html deleted file mode 100644 index 1d3b372..0000000 --- a/docs/downloader.html +++ /dev/null @@ -1,89 +0,0 @@ - - - - - downloader.py - - - -
-
-
-

downloader.py

-
-
-
-
-
- # -
- -
-
-
from abc import ABC, abstractmethod
-import pandas as pd
-
-from exphub.download.experiment import Experiment
-
-
-
-
-
-
- # -
-

An abstract base class for a data downloader.

-

This class should be subclassed to create custom downloaders for specific data sources.

-
-
-
class Downloader(ABC):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
-

Downloads data and returns it as an Experiment instance.

-

This method should be implemented by subclasses to define the data downloading logic.

-

Args: - args: Variable-length arguments specific to the downloader implementation. - *kwargs: Arbitrary keyword arguments specific to the downloader implementation.

-

Returns: - Experiment: An Experiment instance containing the downloaded data.

-
-
-
    @abstractmethod
-    def download(self, *args, **kwargs) -> Experiment:
-
-
-
-
-
-
- # -
- -
-
-
        pass
-
-
-
-
-
-
- diff --git a/docs/experiment.html b/docs/experiment.html deleted file mode 100644 index 1576f9d..0000000 --- a/docs/experiment.html +++ /dev/null @@ -1,465 +0,0 @@ - - - - - experiment.py - - - -
-
-
-

experiment.py

-
-
-
-
-
- # -
- -
-
-
from dataclasses import dataclass, field
-from itertools import product
-from typing import Dict, List
-import pandas as pd
-
-
-
-
-
-
- # -
-

A class representing an experiment with its parameters and series.

-

Attributes: - params (pd.DataFrame): A DataFrame containing the parameters of the experiment. - series (Dict[str, pd.DataFrame]): A dictionary mapping metric names to DataFrames.

-
-
-
@dataclass
-class Experiment:
-
-
-
-
-
-
- # -
- -
-
-
    params: pd.DataFrame
-    series: field(default_factory=dict)  # metric_name -> df
-
-
-
-
-
-
- # -
- -
-
-
    def __str__(self) -> str:
-        return f'Experiment Instance\n{len(self.params_names)} parameters: {self.params_names}\n including...\n\t * Attributes: {self.attributes_names}\n\t * Series: {self.series_names}'
-
-
-
-
-
-
- # -
-

Returns a new Experiment instance with a subset of the parameters.

-

Args: - params_names_to_keep (List[str]): A list of the names of the parameters to keep.

-

Returns: - Experiment: A new Experiment instance with a subset of the parameters.

-
-
-
    def subset_params(self, params_names_to_keep: List[str]) -> 'Experiment':
-
-
-
-
-
-
- # -
- -
-
-
        attributes_params = [p for p in params_names_to_keep if p in self.attributes_names]
-        series_params = [p for p in params_names_to_keep if p in self.series_names]
-
-        if any((p not in self.params_names and p not in self.attributes_names) for p in params_names_to_keep):
-            raise ValueError(f'Invalid parameter name. Valid names are {self.params_names}')
-
-        new_series = {s: self.series[s] for s in series_params}
-        new_params = self.params[attributes_params + series_params]
-
-        return Experiment(new_params, new_series)
-
-
-
-
-
-
- # -
-

Returns a new Experiment instance with a subset of the runs.

-

Args: - runs_ids_to_keep (List[str]): A list of the IDs of the runs to keep.

-

Returns: - Experiment: A new Experiment instance with a subset of the runs.

-
-
-
    def subset_runs(self, runs_ids_to_keep: List[str]) -> 'Experiment':
-
-
-
-
-
-
- # -
- -
-
-
        new_params = self.params[self.params[self.id_column_name].isin(runs_ids_to_keep)]
-        new_series = {s: self.series[s][runs_ids_to_keep] for s in self.series_names}
-
-        return Experiment(new_params, new_series)
-
-
-
-
-
-
- # -
- -
-
-
    def without(self, params_names_to_drop: List[str]) -> 'Experiment':
-        all_params = self.params_names
-        return self.subset_params([p for p in all_params if p not in params_names_to_drop])
-
-
-
-
-
-
- # -
-

Returns the names of the series.

-

Returns: - List[str]: A list of the names of the series.

-
-
-
    @property
-    def series_names(self) -> List[str]:
-
-
-
-
-
-
- # -
- -
-
-
        return list(self.series.keys())
-
-
-
-
-
-
- # -
-

Returns the names of the attributes.

-

Returns: - List[str]: A list of the names of the attributes.

-
-
-
    @property
-    def attributes_names(self) -> List[str]:
-
-
-
-
-
-
- # -
- -
-
-
        return [c for c in self.params.columns if c not in self.series_names]
-
-
-
-
-
-
- # -
-

Returns the names of the parameters.

-

Returns: - List[str]: A list of the names of the parameters.

-
-
-
    @property
-    def params_names(self) -> List[str]:
-
-
-
-
-
-
- # -
- -
-
-
        return self.params.columns.tolist()
-
-
-
-
-
-
- # -
-

Returns the name of the ID column.

-

Returns: - str: The name of the ID column.

-
-
-
    @property
-    def id_column_name(self) -> str:
-
-
-
-
-
-
- # -
- -
-
-
        return 'sys/id' if 'sys/id' in self.params.columns else 'id'
-
-
-
-
-
-
- # -
-

Filters the experiment based on the given conditions.

-

Args: - conditions (list): A list of functions to filter the experiment’s parameters.

-

Returns: - Experiment: A new Experiment instance with filtered parameters and series.

-
-
-
    def filter_via_hyperparams(self, conditions: list) -> 'Experiment':
-
-
-
-
-
-
- # -
- -
-
-
        df_meta = self.params.copy()
-        for fn in conditions:
-            df_meta = df_meta[fn(df_meta)]
-
-
-
-
-
-
- # -
-

If no rows left, return empty experiment

-
-
-
        if len(df_meta) == 0:
-            return Experiment(pd.DataFrame(), {})
-
-
-
-
-
-
- # -
-

Filter series columns only if they are present in the meta df

-
-
-
        series = {}
-
-
-
-
-
-
- # -
-

Shorten compatibility

-
-
-
        id_col = 'sys/id' if 'sys/id' in df_meta.columns else 'id'
-
-        for metric_name, df in self.series.items():
-
-
-
-
-
-
- # -
-

Initialize new series from index

-
-
-
            new_series = pd.DataFrame(index=df.index)
-            new_series.index.name = df.index.name
-
-            for col in df.columns:
-                if col.split('_')[-1] in df_meta[id_col].values:
-                    new_series[col] = df[col].copy()
-
-            series[metric_name] = new_series
-
-        return Experiment(df_meta, series)
-
-
-
-
-
-
- # -
-

Splits the experiment into sub-experiments based on unique combinations of values in the specified columns.

-

Args: - columns (List[str]): A list of column names to split the experiment by.

-

Returns: - Dict[str, Experiment]: A dictionary mapping split descriptions to sub-experiments.

-
-
-
    def split_by_columns(self, columns: List[str]) -> Dict[str, 'Experiment']:
-
-
-
-
-
-
- # -
- -
-
-
        unique_values_by_columns = {col: self.params[col].unique() for col in columns}
-        splits = {}
-        import copy
-        for values in product(*unique_values_by_columns.values()):
-            split_describtion = '\n'.join([f'{col} = {val}' for col, val in zip(columns, values)])
-            experiment = copy.deepcopy(self)
-            for col, val in zip(columns, values):
-                experiment = experiment.filter_via_hyperparams([lambda df: df[col] == val])
-            splits[split_describtion] = experiment
-
-
-
-
-
-
- # -
-

Final filtering to remove empty experiments

-
-
-
        splits = {k: v for k, v in splits.items() if len(v.params) > 0}
-
-        return splits
-
-
-
-
-
-
- # -
-

Merges the experiment with another experiment.

-

Args: - other (Experiment): The experiment to merge with.

-

Returns: - Experiment: A new Experiment instance with merged parameters and series.

-
-
-
    def merge(self, other: 'Experiment') -> 'Experiment':
-
-
-
-
-
-
- # -
- -
-
-
        if self.params_names != other.params_names:
-            raise ValueError(
-                f'Cannot merge experiments with different parameters. Parameters are {self.params_names} and {other.params_names}'
-            )
-
-        new_params = pd.concat([self.params, other.params])
-        new_series = {s: pd.concat([self.series[s], other.series[s]]) for s in self.series_names}
-
-        return Experiment(new_params, new_series)
-
-
-
-
-
-
- # -
-

Drops all runs that have NaN values in any of the series params.

-
-
-
    def drop_runs_with_nan(self) -> 'Experiment':
-
-
-
-
-
-
- # -
- -
-
-
        return self.filter_via_hyperparams([lambda df: ~df.isnull().any(axis=1)])
-
-
-
-
-
-
- diff --git a/docs/experiment_test.html b/docs/experiment_test.html deleted file mode 100644 index 42d8b18..0000000 --- a/docs/experiment_test.html +++ /dev/null @@ -1,333 +0,0 @@ - - - - - experiment_test.py - - - -
-
-
-

experiment_test.py

-
-
-
-
-
- # -
- -
-
-
import pytest
-import pandas as pd
-import numpy as np
-from exphub.download.experiment import Experiment
-
-
-
-
-
-
- # -
-

Create a sample Experiment instance for testing

-
-
-
params_data = {
-    'id': ['run1', 'run2', 'run3'],
-    'attr1': [1, 2, 3],
-    'attr2': [2, 3, 1],
-    'metric1': [0.1, 0.2, 0.3],
-    'metric2': [0.3, 0.2, 0.1],
-}
-params_df = pd.DataFrame(params_data)
-sample_series = {
-    'metric1': pd.DataFrame(data=[[0.1, 0.2, 0.3]], columns=['run1', 'run2', 'run3']),
-    'metric2': pd.DataFrame(data=[[0.3, 0.2, 0.1]], columns=['run1', 'run2', 'run3']),
-}
-sample_experiment = Experiment(params=params_df, series=sample_series)
-
-
-
-
-
-
- # -
- -
-
-
@pytest.mark.parametrize(
-    "params_names_to_keep,expected",
-    [
-        (['attr1'], 1),
-        (['attr2'], 1),
-        (['metric1'], 1),
-        (['attr1', 'attr2'], 2),
-        (['attr1', 'metric1'], 2),
-    ],
-)
-def test_subset_params(params_names_to_keep, expected):
-    result = sample_experiment.subset_params(params_names_to_keep)
-    assert len(result.params.columns) == expected
-
-
-@pytest.mark.parametrize(
-    "runs_ids_to_keep,expected",
-    [
-        (['run1'], 1),
-        (['run2'], 1),
-        (['run1', 'run2'], 2),
-    ],
-)
-def test_subset_runs(runs_ids_to_keep, expected):
-    result = sample_experiment.subset_runs(runs_ids_to_keep)
-    assert len(result.series[sample_experiment.series_names[0]].columns) == expected
-
-
-@pytest.mark.parametrize(
-    "params_names_to_drop,expected",
-    [
-        (['attr1'], 4),
-        (['attr2'], 4),
-        (['metric1'], 4),
-        (['attr1', 'attr2'], 3),
-        (['attr1', 'metric1'], 3),
-    ],
-)
-def test_without(params_names_to_drop, expected):
-    result = sample_experiment.without(params_names_to_drop)
-    assert len(result.params.columns) == expected
-
-
-@pytest.mark.parametrize(
-    "conditions,expected",
-    [
-        ([lambda df: df['attr1'] > 1], 2),
-        ([lambda df: df['attr2'] > 1], 2),
-        ([lambda df: df['attr1'] > 1, lambda df: df['attr2'] > 1], 1),
-    ],
-)
-def test_filter_via_hyperparams(conditions, expected):
-    result = sample_experiment.filter_via_hyperparams(conditions)
-    assert len(result.params) == expected
-
-
-@pytest.mark.parametrize(
-    "columns,expected",
-    [
-        (['attr1'], 3),
-        (['attr2'], 3),
-    ],
-)
-def test_split_by_columns(columns, expected):
-    result = sample_experiment.split_by_columns(columns)
-    assert len(result) == expected
-
-
-@pytest.mark.parametrize(
-    "params_names_to_keep,expected_columns",
-    [
-        (['attr1'], ['attr1']),
-        (['attr2'], ['attr2']),
-        (['metric1'], ['metric1']),
-        (['attr1', 'attr2'], ['attr1', 'attr2']),
-        (['attr1', 'metric1'], ['attr1', 'metric1']),
-    ],
-)
-def test_subset_params2(params_names_to_keep, expected_columns):
-    result = sample_experiment.subset_params(params_names_to_keep)
-    assert len(result.params.columns) == len(expected_columns)
-    assert set(result.params.columns) == set(expected_columns)
-
-
-@pytest.mark.parametrize(
-    "runs_ids_to_keep,expected_rows",
-    [
-        (['run1'], ['run1']),
-        (['run2'], ['run2']),
-        (['run1', 'run2'], ['run1', 'run2']),
-    ],
-)
-def test_subset_runs2(runs_ids_to_keep, expected_rows):
-    result = sample_experiment.subset_runs(runs_ids_to_keep)
-    assert len(result.params) == len(expected_rows)
-    assert set(result.params['id'].values) == set(expected_rows)
-
-
-@pytest.mark.parametrize(
-    "params_names_to_drop,expected_columns",
-    [
-        (['attr1'], ['id', 'attr2', 'metric1', 'metric2']),
-        (['attr2'], ['id', 'attr1', 'metric1', 'metric2']),
-        (['metric1'], ['id', 'attr1', 'attr2', 'metric2']),
-        (['attr1', 'attr2'], ['id', 'metric1', 'metric2']),
-        (['attr1', 'metric1'], ['id', 'attr2', 'metric2']),
-    ],
-)
-def test_without2(params_names_to_drop, expected_columns):
-    result = sample_experiment.without(params_names_to_drop)
-    assert len(result.params.columns) == len(expected_columns)
-    assert set(result.params.columns) == set(expected_columns)
-
-
-def create_test_experiment():
-    params = pd.DataFrame({'param1': [1, 2], 'param2': [3, 4]})
-    series = {'metric1': pd.DataFrame({'value': [5, 6]}), 'metric2': pd.DataFrame({'value': [7, 8]})}
-    return Experiment(params, series)
-
-
-def test_merge_success():
-    exp1 = create_test_experiment()
-    exp2 = create_test_experiment()
-
-    merged_exp = exp1.merge(exp2)
-
-    assert len(merged_exp.params) == 4
-    assert len(merged_exp.series['metric1']) == 4
-    assert len(merged_exp.series['metric2']) == 4
-
-
-def test_merge_failure_different_params():
-    exp1 = create_test_experiment()
-
-    params = pd.DataFrame({'param3': [1, 2], 'param4': [3, 4]})
-    series = {'metric1': pd.DataFrame({'value': [5, 6]}), 'metric2': pd.DataFrame({'value': [7, 8]})}
-    exp2 = Experiment(params, series)
-
-    with pytest.raises(ValueError) as excinfo:
-        exp1.merge(exp2)
-
-    assert "Cannot merge experiments with different parameters" in str(excinfo.value)
-
-
-def test_merge_correct_dfs():
-    exp1 = create_test_experiment()
-    exp2 = create_test_experiment()
-
-    merged_exp = exp1.merge(exp2)
-
-
-
-
-
-
- # -
-

Check if params DataFrame is correct

-
-
-
    expected_params = pd.DataFrame({'param1': [1, 2, 1, 2], 'param2': [3, 4, 3, 4]})
-
-    pd.testing.assert_frame_equal(merged_exp.params.reset_index(drop=True), expected_params.reset_index(drop=True))
-
-
-
-
-
-
- # -
-

Check if metric1 DataFrame is correct

-
-
-
    expected_metric1 = pd.DataFrame({'value': [5, 6, 5, 6]})
-    pd.testing.assert_frame_equal(merged_exp.series['metric1'].reset_index(drop=True),
-                                  expected_metric1.reset_index(drop=True))
-
-
-
-
-
-
- # -
-

Check if metric2 DataFrame is correct

-
-
-
    expected_metric2 = pd.DataFrame({'value': [7, 8, 7, 8]})
-    pd.testing.assert_frame_equal(merged_exp.series['metric2'].reset_index(drop=True),
-                                  expected_metric2.reset_index(drop=True))
-
-
-
-
-
-
- # -
- -
-
-
def create_test_experiment_with_nan():
-    params = pd.DataFrame({
-        'id': [0, 1],
-        'param1': [1, 2],
-        'param2': [3, 4],
-        'metric1': [5, float('nan')],
-        'metric2': [7, 8]
-    })
-    series = {}
-    return Experiment(params, series)
-
-
-
-
-
-
- # -
- -
-
-
def test_drop_runs_with_nan():
-    exp = create_test_experiment_with_nan()
-    exp_no_nan = exp.drop_runs_with_nan()
-
-
-
-
-
-
- # -
-

Check if the resulting Experiment has the correct number of runs

-
-
-
    assert len(exp_no_nan.params) == 1
-
-
-
-
-
-
- # -
-

Check if the remaining run has the correct ID

-
-
-
    assert exp_no_nan.params['id'].iloc[0] == 0
-
-
-
-
-
-
- # -
-

Check if the remaining run has the correct values

-
-
-
    assert exp_no_nan.params['param1'].iloc[0] == 1
-    assert exp_no_nan.params['param2'].iloc[0] == 3
-    assert exp_no_nan.params['metric1'].iloc[0] == 5
-    assert exp_no_nan.params['metric2'].iloc[0] == 7
-
-
-
-
-
-
- diff --git a/docs/metric.html b/docs/metric.html deleted file mode 100644 index 3d97458..0000000 --- a/docs/metric.html +++ /dev/null @@ -1,80 +0,0 @@ - - - - - metric.py - - - -
-
-
-

metric.py

-
-
-
-
-
- # -
- -
-
-
from abc import ABC, abstractmethod
-from typing import Any
-
-
-
-
-
-
- # -
- -
-
-
class Metric(ABC):
-
-
-
-
-
-
- # -
- -
-
-
    def __init__(self) -> None:
-        super().__init__()
-
-
-
-
-
-
- # -
- -
-
-
    @abstractmethod
-    def __call__(self, **kwargs: Any) -> Any:
-        return super().__call__(**kwargs)
-
-
-class MetricsCollection:
-
-    def __init__(self, metrics: list[Metric]) -> None:
-        super().__init__()
-        self.metrics = metrics
-
-    def __call__(self, **kwargs) -> Any:
-        return {metric.__class__.__name__: metric(**kwargs) for metric in self.metrics}
-
-
-
-
-
-
- diff --git a/docs/neptune_downloader.html b/docs/neptune_downloader.html deleted file mode 100644 index fadb365..0000000 --- a/docs/neptune_downloader.html +++ /dev/null @@ -1,351 +0,0 @@ - - - - - neptune_downloader.py - - - -
-
-
-

neptune_downloader.py

-
-
-
-
-
- # -
- -
-
-
from exphub.download.downloader import Downloader
-import pandas as pd
-from typing import Optional, Union, List
-import os
-import neptune.new as neptune
-from exphub.download.experiment import Experiment
-from exphub.utils.noise import Suppressor
-from exphub.utils.paths import shorten_paths
-
-
-
-
-
-
- # -
-

A NeptuneDownloader class for downloading experiment data from Neptune.ai.

-

Attributes: - project_name (str): The name of the Neptune project. - api_token (Optional[str]): The Neptune API token. If not provided, it should be set as an environment variable.

-
-
-
class NeptuneDownloader(Downloader):
-
-
-
-
-
-
- # -
- -
-
-
    NEPTUNE_API_TOKEN = 'NEPTUNE_API_TOKEN'
-
-
-
-
-
-
- # -
-

Download experiment data from Neptune.ai with the specified filters and settings.

-

Args: - id (Optional[Union[str, List[str]]]): A list of experiment IDs to download. - state (Optional[Union[str, List[str]]]): A list of experiment states to download. - owner (Optional[Union[str, List[str]]]): A list of experiment owners to download. - tag (Optional[Union[str, List[str]]]): A list of experiment tags to download. - attributes (Optional[List[str]]): A list of experiment attributes to download. - short_names (bool): Whether to shorten the column names in the resulting data. Defaults to True. - series (List[str]): A list of experiment series to download.

-

Returns: - Experiment: An Experiment object containing the downloaded data.

-
-
-
    def __init__(self, project_name: str, api_token: Optional[str] = None):
-
-
-
-
-
-
- # -
- -
-
-
        self.api_token = api_token
-        self.project_name = project_name
-        if self.api_token is None:
-            if NeptuneDownloader.NEPTUNE_API_TOKEN not in os.environ:
-                raise ValueError(f'Environment variable {NeptuneDownloader.NEPTUNE_API_TOKEN} not found.')
-            self.api_token = os.environ[NeptuneDownloader.NEPTUNE_API_TOKEN]
-        else:
-            self.api_token = api_token
-            os.environ[NeptuneDownloader.NEPTUNE_API_TOKEN] = api_token
-        self.project = neptune.init_project(project=self.project_name, mode="read-only", api_token=self.api_token)
-
-
-
-
-
-
- # -
- -
-
-
    def download(self,
-                 id: Optional[Union[str, List[str]]] = None,
-                 state: Optional[Union[str, List[str]]] = None,
-                 owner: Optional[Union[str, List[str]]] = None,
-                 tag: Optional[Union[str, List[str]]] = None,
-                 attributes: Optional[List[str]] = None,
-                 short_names: bool = True,
-                 series: List[str] = []) -> Experiment:
-        if all([id is None, state is None, owner is None, tag is None]):
-            raise ValueError('At least one of id, state, owner, or tag must be provided.')
-        columns = [*attributes, *series]
-        params = self.project.fetch_runs_table(owner=owner, id=id, state=state, tag=tag, columns=columns).to_pandas()
-        series = {}
-        for series_col in series:
-            series[series_col] = self._download_series(series_col, id=id, state=state, owner=owner, tag=tag)
-
-        self.short_names = short_names
-
-        if short_names:
-
-
-
-
-
-
- # -
-

Modify params in place

-
-
-
            meta_long2short = shorten_paths(params)
-            params.rename(columns=meta_long2short, inplace=True)
-
-
-
-
-
-
- # -
-

Modify series in place

-
-
-
            short_df_series = {}
-            for series_col, df in series.items():
-                long2short = shorten_paths(df)
-                short_df_series[meta_long2short[series_col]] = df.rename(columns=long2short)
-            series = short_df_series
-
-        return Experiment(params, series)
-
-
-
-
-
-
- # -
-

Downloads a specified series of data from Neptune.ai based on filtering criteria.

-

Args: - series_column (Union[List[str], str]): The name of the series to download. - id (Optional[Union[str, List[str]]]): The run ID(s) to filter by. - state (Optional[Union[str, List[str]]]): The run state(s) to filter by. - owner (Optional[Union[str, List[str]]]): The run owner(s) to filter by. - tag (Optional[Union[str, List[str]]]): The run tag(s) to filter by.

-

Returns: - pd.DataFrame: A pandas DataFrame containing the downloaded series data.

-
-
-
    def _download_series(self,
-                         series_column: Union[List[str], str],
-                         id: Optional[Union[str, List[str]]] = None,
-                         state: Optional[Union[str, List[str]]] = None,
-                         owner: Optional[Union[str, List[str]]] = None,
-                         tag: Optional[Union[str, List[str]]] = None) -> pd.DataFrame:
-
-
-
-
-
-
- # -
- -
-
-
        if all([id is None, state is None, owner is None, tag is None]):
-            raise ValueError('At least one of id, state, owner, or tag must be provided.')
-
-        ids = self.project.fetch_runs_table(
-            owner=owner, id=id, state=state, tag=tag, columns='sys/id').to_pandas()['sys/id'].values
-
-
-
-
-
-
- # -
-

Run initialization

-
-
-
        runs = [
-            Suppressor.exec_no_stdout(
-                neptune.init_run, project=self.project_name, with_id=run_id, mode="read-only", api_token=self.api_token)
-            for run_id in ids
-        ]
-
-
-
-
-
-
- # -
- -
-
-
        def _fetch_values(col_label):
-            if isinstance(col_label, list):
-                assert len(col_label) == 1
-                col_label = col_label[0]
-
-
-
-
-
-
- # -
-

Fetching values and counting the number of values

-
-
-
            id2value = {}
-            missing = 0
-            for id, run in zip(ids, runs):
-                try:
-                    id2value[id] = Suppressor.exec_no_stdout(run[col_label].fetch_values, include_timestamp=False)
-                except neptune.exceptions.NeptuneException:
-                    print(f'[WARNING] Run {id} does not have a column named {col_label}')
-                    missing += 1
-            if missing == len(ids):
-                raise ValueError(f'No runs have a column named {col_label}')
-
-            df = pd.DataFrame({})
-            for id, value in id2value.items():
-                df[id] = value['value']
-
-            return df
-
-        return _fetch_values(series_column)
-
-
-
-
-
-
- # -
- -
-
-
class NeptuneDownloaderParentExpLinked(NeptuneDownloader):
-
-
-
-
-
-
- # -
- -
-
-
    def __init__(self, project_name: str, api_token: Optional[str] = None, param_name_of_parent_exp_id: str = None):
-        super().__init__(project_name, api_token)
-        self.param_name_of_parent_exp_id = param_name_of_parent_exp_id
-
-
-
-
-
-
- # -
- -
-
-
    def download(self,
-                 id: Optional[Union[str, List[str]]] = None,
-                 state: Optional[Union[str, List[str]]] = None,
-                 owner: Optional[Union[str, List[str]]] = None,
-                 tag: Optional[Union[str, List[str]]] = None,
-                 attributes: Optional[List[str]] = None,
-                 short_names: bool = True,
-                 series: List[str] = [],
-                 attributes_parent: List[str] = [],
-                 series_parent: List[str] = []) -> Experiment:
-        current_experiment = super().download(id, state, owner, tag, attributes, short_names, series)
-        parent_experiment = self._download_parent_experiment(current_experiment, attributes_parent, series_parent)
-
-
-
-
-
-
- # -
-

Collapse params and metrics of parent expeirment into current_experiment, ‘parent’ namespace

-
-
-
        parent_experiment.params = parent_experiment.params.add_prefix('parent/')
-        parent_experiment.series = {k: v.add_prefix('parent/') for k, v in parent_experiment.series.items()}
-
-        return Experiment(
-            pd.concat([current_experiment.params, parent_experiment.params], axis=1), {
-                **current_experiment.series,
-                **parent_experiment.series
-            })
-
-
-
-
-
-
- # -
- -
-
-
    def _download_parent_experiment(self, current_experiment, attributes_parent: List[str], series_parent: List[str]):
-        parent_experiment_id = current_experiment.params[self.param_name_of_parent_exp_id].values[0]
-        return super().download(id=parent_experiment_id, attributes=attributes_parent, series=series_parent)
-
-
-
-
-
-
- diff --git a/docs/noise.html b/docs/noise.html deleted file mode 100644 index d166619..0000000 --- a/docs/noise.html +++ /dev/null @@ -1,59 +0,0 @@ - - - - - noise.py - - - -
-
-
-

noise.py

-
-
-
-
-
- # -
- -
-
-
import os
-import contextlib
-
-
-
-
-
-
- # -
- -
-
-
class Suppressor:
-
-
-
-
-
-
- # -
- -
-
-
    @classmethod
-    def exec_no_stdout(cls, fn, **kwargs):
-        with open(os.devnull, 'w') as devnull:
-            with contextlib.redirect_stdout(devnull):
-                return fn(**kwargs)
-
-
-
-
-
-
- diff --git a/docs/paths.html b/docs/paths.html deleted file mode 100644 index f9145cc..0000000 --- a/docs/paths.html +++ /dev/null @@ -1,95 +0,0 @@ - - - - - paths.py - - - -
-
-
-

paths.py

-
-
-
-
-
- # -
-

Find the longest common suffix between the current_path and all other paths in all_paths.

-

Args: - all_paths (List[str]): A list of all path strings. - current_path (str): The path string for which the longest common suffix is calculated.

-

Returns: - int: The length of the longest common suffix.

-
-
-
def find_longest_common_suffix(all_paths, current_path):
-
-
-
-
-
-
- # -
- -
-
-
    max_common_suffix_length = 1
-    paths = [path for path in all_paths if path != current_path]
-    for i in range(1, len(current_path.split('/'))):
-        if any('/'.join((path.split('/')[-i:])) == '/'.join(current_path.split('/')[-i:])
-               for path in paths
-               if len(path.split('/')) >= i):
-            max_common_suffix_length += 1
-        else:
-            break
-
-    return max_common_suffix_length
-
-
-
-
-
-
- # -
-

Shorten the input paths by finding the longest common suffix for each path and keeping only the required parts to distinguish between them. Also, return a dictionary mapping the original paths to their shortened versions.

-

Args: - paths (List[str]): A list of input path strings.

-

Returns: - Dict[str, str]: A dictionary mapping the original paths to their shortened versions.

-
-
-
def shorten_paths(paths):
-
-
-
-
-
-
- # -
- -
-
-
    shortened_paths = []
-    path_mapping = {}
-
-    for path in paths:
-        split_path = path.split('/')
-        max_common_suffix_length = find_longest_common_suffix(paths, path)
-        shortened_path = '/'.join(split_path[-max_common_suffix_length:])
-        shortened_paths.append(shortened_path)
-        path_mapping[path] = shortened_path
-
-    return path_mapping
-
-
-
-
-
-
- diff --git a/docs/paths_test.html b/docs/paths_test.html deleted file mode 100644 index 2d21f78..0000000 --- a/docs/paths_test.html +++ /dev/null @@ -1,80 +0,0 @@ - - - - - paths_test.py - - - -
-
-
-

paths_test.py

-
-
-
-
-
- # -
- -
-
-
import pytest
-from exphub.utils.paths import shorten_paths
-
-
-
-
-
-
- # -
- -
-
-
@pytest.mark.parametrize(
-    "paths,expected",
-    [
-        (
-            ["/a/b/c/d", "/a/b/c/e"],
-            {
-                "/a/b/c/d": "d",
-                "/a/b/c/e": "e"
-            },
-        ),
-        (
-            ["/a/b/c/d", "/a/b/c/e", "/a/b/c/f"],
-            {
-                "/a/b/c/d": "d",
-                "/a/b/c/e": "e",
-                "/a/b/c/f": "f"
-            },
-        ),
-        (
-            ["/a/b/c/d/e", "/a/b/c/e"],
-            {
-                "/a/b/c/d/e": "d/e",
-                "/a/b/c/e": "c/e"
-            },
-        ),
-        (
-            ["/a/b/c/d/e", "/a/b/c/d/f", "/a/b/c/d/g"],
-            {
-                "/a/b/c/d/e": "e",
-                "/a/b/c/d/f": "f",
-                "/a/b/c/d/g": "g"
-            },
-        ),
-    ],
-)
-def test_shorten_paths(paths, expected):
-    result = shorten_paths(paths)
-    assert result == expected
-
-
-
-
-
-
- diff --git a/docs/pycco.css b/docs/pycco.css deleted file mode 100644 index aef571a..0000000 --- a/docs/pycco.css +++ /dev/null @@ -1,190 +0,0 @@ -/*--------------------- Layout and Typography ----------------------------*/ -body { - font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif; - font-size: 16px; - line-height: 24px; - color: #252519; - margin: 0; padding: 0; - background: #f5f5ff; -} -a { - color: #261a3b; -} - a:visited { - color: #261a3b; - } -p { - margin: 0 0 15px 0; -} -h1, h2, h3, h4, h5, h6 { - margin: 40px 0 15px 0; -} -h2, h3, h4, h5, h6 { - margin-top: 0; - } -#container { - background: white; - } -#container, div.section { - position: relative; -} -#background { - position: absolute; - top: 0; left: 580px; right: 0; bottom: 0; - background: #f5f5ff; - border-left: 1px solid #e5e5ee; - z-index: 0; -} -#jump_to, #jump_page { - background: white; - -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; - -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; - font: 10px Arial; - text-transform: uppercase; - cursor: pointer; - text-align: right; -} -#jump_to, #jump_wrapper { - position: fixed; - right: 0; top: 0; - padding: 5px 10px; -} - #jump_wrapper { - padding: 0; - display: none; - } - #jump_to:hover #jump_wrapper { - display: block; - } - #jump_page { - padding: 5px 0 3px; - margin: 0 0 25px 25px; - } - #jump_page .source { - display: block; - padding: 5px 10px; - text-decoration: none; - border-top: 1px solid #eee; - } - #jump_page .source:hover { - background: #f5f5ff; - } - #jump_page .source:first-child { - } -div.docs { - float: left; - max-width: 500px; - min-width: 500px; - min-height: 5px; - padding: 10px 25px 1px 50px; - vertical-align: top; - text-align: left; -} - .docs pre { - margin: 15px 0 15px; - padding-left: 15px; - } - .docs p tt, .docs p code { - background: #f8f8ff; - border: 1px solid #dedede; - font-size: 12px; - padding: 0 0.2em; - } - .octowrap { - position: relative; - } - .octothorpe { - font: 12px Arial; - text-decoration: none; - color: #454545; - position: absolute; - top: 3px; left: -20px; - padding: 1px 2px; - opacity: 0; - -webkit-transition: opacity 0.2s linear; - } - div.docs:hover .octothorpe { - opacity: 1; - } -div.code { - margin-left: 580px; - padding: 14px 15px 16px 50px; - vertical-align: top; -} - .code pre, .docs p code { - font-size: 12px; - } - pre, tt, code { - line-height: 18px; - font-family: Monaco, Consolas, "Lucida Console", monospace; - margin: 0; padding: 0; - } -div.clearall { - clear: both; -} - - -/*---------------------- Syntax Highlighting -----------------------------*/ -td.linenos { background-color: #f0f0f0; padding-right: 10px; } -span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } -body .hll { background-color: #ffffcc } -body .c { color: #408080; font-style: italic } /* Comment */ -body .err { border: 1px solid #FF0000 } /* Error */ -body .k { color: #954121 } /* Keyword */ -body .o { color: #666666 } /* Operator */ -body .cm { color: #408080; font-style: italic } /* Comment.Multiline */ -body .cp { color: #BC7A00 } /* Comment.Preproc */ -body .c1 { color: #408080; font-style: italic } /* Comment.Single */ -body .cs { color: #408080; font-style: italic } /* Comment.Special */ -body .gd { color: #A00000 } /* Generic.Deleted */ -body .ge { font-style: italic } /* Generic.Emph */ -body .gr { color: #FF0000 } /* Generic.Error */ -body .gh { color: #000080; font-weight: bold } /* Generic.Heading */ -body .gi { color: #00A000 } /* Generic.Inserted */ -body .go { color: #808080 } /* Generic.Output */ -body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ -body .gs { font-weight: bold } /* Generic.Strong */ -body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ -body .gt { color: #0040D0 } /* Generic.Traceback */ -body .kc { color: #954121 } /* Keyword.Constant */ -body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */ -body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */ -body .kp { color: #954121 } /* Keyword.Pseudo */ -body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */ -body .kt { color: #B00040 } /* Keyword.Type */ -body .m { color: #666666 } /* Literal.Number */ -body .s { color: #219161 } /* Literal.String */ -body .na { color: #7D9029 } /* Name.Attribute */ -body .nb { color: #954121 } /* Name.Builtin */ -body .nc { color: #0000FF; font-weight: bold } /* Name.Class */ -body .no { color: #880000 } /* Name.Constant */ -body .nd { color: #AA22FF } /* Name.Decorator */ -body .ni { color: #999999; font-weight: bold } /* Name.Entity */ -body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ -body .nf { color: #0000FF } /* Name.Function */ -body .nl { color: #A0A000 } /* Name.Label */ -body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ -body .nt { color: #954121; font-weight: bold } /* Name.Tag */ -body .nv { color: #19469D } /* Name.Variable */ -body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ -body .w { color: #bbbbbb } /* Text.Whitespace */ -body .mf { color: #666666 } /* Literal.Number.Float */ -body .mh { color: #666666 } /* Literal.Number.Hex */ -body .mi { color: #666666 } /* Literal.Number.Integer */ -body .mo { color: #666666 } /* Literal.Number.Oct */ -body .sb { color: #219161 } /* Literal.String.Backtick */ -body .sc { color: #219161 } /* Literal.String.Char */ -body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */ -body .s2 { color: #219161 } /* Literal.String.Double */ -body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ -body .sh { color: #219161 } /* Literal.String.Heredoc */ -body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ -body .sx { color: #954121 } /* Literal.String.Other */ -body .sr { color: #BB6688 } /* Literal.String.Regex */ -body .s1 { color: #219161 } /* Literal.String.Single */ -body .ss { color: #19469D } /* Literal.String.Symbol */ -body .bp { color: #954121 } /* Name.Builtin.Pseudo */ -body .vc { color: #19469D } /* Name.Variable.Class */ -body .vg { color: #19469D } /* Name.Variable.Global */ -body .vi { color: #19469D } /* Name.Variable.Instance */ -body .il { color: #666666 } /* Literal.Number.Integer.Long */ diff --git a/docs/wizzards.html b/docs/wizzards.html deleted file mode 100644 index e3e2650..0000000 --- a/docs/wizzards.html +++ /dev/null @@ -1,319 +0,0 @@ - - - - - wizzards.py - - - -
-
-
-

wizzards.py

-
-
-
-
-
- # -
- -
-
-
from abc import ABC, abstractmethod
-
-import pandas as pd
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-
-from exphub.download.experiment import Experiment
-
-
-
-
-
-
- # -
-

An abstract base class for creating wizards to render various visualizations.

-
-
-
class Wizard(ABC):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
-

Renders the visualization. This method should be implemented by subclasses.

-

:param kwargs: Optional keyword arguments specific to each subclass.

-
-
-
    @abstractmethod
-    def render(self, **kwargs):
-
-
-
-
-
-
- # -
- -
-
-
        pass
-
-
-
-
-
-
- # -
-

A wizard class for rendering tables with custom styling. This class is used for visualizing -experiment data by applying different background colors to attributes and series in the table.

-
-
-
class TableWizard(Wizard):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
-

Initializes the TableWizard with the given experiment.

-

:param experiment: The experiment object containing data to be visualized.

-
-
-
    def __init__(self, experiment: Experiment):
-
-
-
-
-
-
- # -
- -
-
-
        self.experiment = experiment
-
-
-
-
-
-
- # -
-

Renders the table with the specified background colors for attributes and series.

-

:param attributes_color: The background color to be applied to attribute names. Default is ‘#211b1b’. -:param series_color: The background color to be applied to series names. Default is ‘#022b11’. -:return: The styled table with the specified background colors applied.

-
-
-
    def render(self, attributes_color: str = '#211b1b', series_color: str = '#022b11'):
-
-
-
-
-
-
- # -
- -
-
-
        return self.experiment.params.style.set_properties(
-            **{
-                'background-color': attributes_color
-            }, subset=self.experiment.attributes_names).set_properties(
-                **{'background-color': series_color}, subset=self.experiment.series_names)
-
-
-
-
-
-
- # -
-

A wizard class for rendering line plots of time series data from experiments. This class is used -for visualizing experiment data by plotting single or multiple series on a line chart or subplots.

-
-
-
class SeriesWizard(Wizard):
-
-
-
-
-
-
- # -
- -
-
-
-
-
-
-
-
-
- # -
-

Initializes the SeriesWizard with the given experiment.

-

:param experiment: The experiment object containing data to be visualized.

-
-
-
    def __init__(self, experiment: Experiment):
-
-
-
-
-
-
- # -
- -
-
-
        self.experiment = experiment
-
-
-
-
-
-
- # -
-

Renders the line plot visualization for single or multiple series.

-

:return: A plotly figure containing the line plot(s) of the time series data.

-
-
-
    def render(self):
-
-
-
-
-
-
- # -
- -
-
-
        if len(self.experiment.series_names) == 0:
-            raise ValueError('No series to plot.')
-
-        if len(self.experiment.series_names) > 1:
-            return self._render_multiple_series()
-
-        return self._render_single_series()
-
-
-
-
-
-
- # -
- -
-
-
    def _render_single_series(self):
-        fig = go.Figure()
-        metric_name = self.experiment.series_names[0]
-
-        xaxis_title = 'step'
-        yaxis_title = metric_name
-        title = f'{yaxis_title} per {xaxis_title}'
-
-        traces = self._generate_traces(self.experiment.series[self.experiment.series_names[0]])
-
-        fig.add_traces(traces)
-        fig.update_layout(title=title, xaxis_title=xaxis_title, yaxis_title=metric_name, legend_title='runs')
-
-        return fig
-
-
-
-
-
-
- # -
- -
-
-
    def _generate_traces(self, series: pd.DataFrame):
-        traces = []
-        for run_id in series.columns:
-            traces.append(go.Scatter(x=series.index, y=series[run_id], name=run_id))
-        return traces
-
-
-
-
-
-
- # -
- -
-
-
    def _render_multiple_series(self):
-        n_plots = len(self.experiment.series_names)
-        xaxis_title = 'step'
-
-        fig = make_subplots(
-            rows=1,
-            cols=n_plots,
-            shared_xaxes=False,
-            shared_yaxes=True,
-            subplot_titles=[f'{metric_name} per step' for metric_name in self.experiment.series_names])
-        grid = [(x, 1) for x in range(1, n_plots + 1)]
-
-        for (x, y), series_name in zip(grid, self.experiment.series_names):
-            traces = self._generate_traces(self.experiment.series[series_name])
-            fig.add_traces(traces, rows=y, cols=x)
-            fig.update_yaxes(title_text=series_name, row=y, col=x)
-            fig.update_xaxes(title_text=xaxis_title, row=y, col=x)
-
-        return fig
-
-
-
-
-
-
- diff --git a/exphub/download/neptune_downloader.py b/exphub/download/neptune_downloader.py index 9c9a813..9c446e4 100644 --- a/exphub/download/neptune_downloader.py +++ b/exphub/download/neptune_downloader.py @@ -66,15 +66,15 @@ def download(self, raise ValueError('At least one of id, state, owner, or tag must be provided.') columns = [*attributes, *series] - hsh = tag if tag is not None else 'x' - logger.info(f'Hash of columns: {hsh}') - logger.info(f'Cache path: {os.path.join(NeptuneDownloader.EXPHUB_CACHE, f"{hsh}.joblib")}') - if (not force) and os.path.exists(os.path.join(NeptuneDownloader.EXPHUB_CACHE, f'{hsh}.joblib')): - logger.info(f'Loading experiment from cache {hsh}.joblib') - logger.warning(f'Loading experiment from cache {hsh}.joblib is based on the columns.\nIf you have new runs with the same columns, please override the cache by using the override_cache argument=True') - return jl.load(os.path.join(NeptuneDownloader.EXPHUB_CACHE, f'{hsh}.joblib')) - else: - logger.info(f'No cache found. Downloading experiment from Neptune.ai') + # hsh = tag if tag is not None else 'x' + # logger.info(f'Hash of columns: {hsh}') + # logger.info(f'Cache path: {os.path.join(NeptuneDownloader.EXPHUB_CACHE, f"{hsh}.joblib")}') + # if (not force) and os.path.exists(os.path.join(NeptuneDownloader.EXPHUB_CACHE, f'{hsh}.joblib')): + # logger.info(f'Loading experiment from cache {hsh}.joblib') + # logger.warning(f'Loading experiment from cache {hsh}.joblib is based on the columns.\nIf you have new runs with the same columns, please override the cache by using the override_cache argument=True') + # return jl.load(os.path.join(NeptuneDownloader.EXPHUB_CACHE, f'{hsh}.joblib')) + # else: + # logger.info(f'No cache found. Downloading experiment from Neptune.ai') params = self.project.fetch_runs_table(owner=owner, id=id, state=state, tag=tag, columns=columns).to_pandas() series_dict = {} for series_col in series: @@ -100,8 +100,8 @@ def download(self, logger.info(f'dtypes of params: {exp.params.dtypes}') # Cache experiment - jl.dump(exp, os.path.join(NeptuneDownloader.EXPHUB_CACHE, f'{hsh}.joblib')) - logger.info(f'Experiment cached at {os.path.join(NeptuneDownloader.EXPHUB_CACHE, f"{hsh}.joblib")}') + # jl.dump(exp, os.path.join(NeptuneDownloader.EXPHUB_CACHE, f'{hsh}.joblib')) + # logger.info(f'Experiment cached at {os.path.join(NeptuneDownloader.EXPHUB_CACHE, f"{hsh}.joblib")}') return exp diff --git a/exphub/relaunchers/__init__.py b/exphub/relaunchers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/exphub/relaunchers/validator.py b/exphub/relaunchers/validator.py deleted file mode 100644 index 42ef084..0000000 --- a/exphub/relaunchers/validator.py +++ /dev/null @@ -1,141 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import List -from loguru import logger - -from exphub.download.experiment import Experiment - -class RelaunchTrigger(ABC): - """Abstract base class for relaunch triggers. - - A relaunch trigger is used to determine whether an experiment should be relaunched based on certain conditions. - """ - def filter(self, experiment: Experiment) -> bool: - """Filter experiments based on the conditions defined in the derived class. - - Args: - experiment (Experiment): The experiment to be filtered. - - Returns: - bool: True if the experiment meets the conditions, False otherwise. - """ - return experiment.filter_via_hyperparams(self.conditions) - - @property - @abstractmethod - def conditions(self) -> List[str]: - """Abstract property that should be implemented in derived classes. - - Returns: - List[str]: A list of conditions that the experiment must meet to be relaunched. - """ - raise NotImplementedError - -class FailedStatus(RelaunchTrigger): - """Relaunch trigger for experiments that have failed.""" - @property - def conditions(self) -> List[str]: - """Conditions for relaunching experiments that have failed. - - Returns: - List[str]: A list of conditions that the experiment must meet to be relaunched. - """ - return [ - lambda df: df['failed'] == 'True' - ] - -class NotEnoughEpochs(RelaunchTrigger): - """Relaunch trigger for experiments that have not reached a minimum number of epochs.""" - def __init__(self, min_epochs: int) -> None: - """Initialize the trigger with a minimum number of epochs. - - Args: - min_epochs (int): The minimum number of epochs required for an experiment. - """ - self.min_epochs = min_epochs - @property - def conditions(self) -> List[str]: - """Conditions for relaunching experiments that have not reached a minimum number of epochs. - - Returns: - List[str]: A list of conditions that the experiment must meet to be relaunched. - """ - return [ - lambda df: df['epoch'] < self.min_epochs - ] - -def _load_default_triggers() -> List[RelaunchTrigger]: - """Load the default relaunch triggers. - - Returns: - List[RelaunchTrigger]: A list of default relaunch triggers. - """ - return [ - FailedStatus(), - NotEnoughEpochs(5) - ] - -@dataclass -class ValidationResult: - """Data class for storing the results of experiment validation.""" - ids_success: List[str] # all triggers passed - ids_failed: List[str] # at least one trigger failed - - def summary(self): - """Print a summary of the validation results.""" - logger.info(f'Validation summary:') - logger.info(f'* {len(self.ids_success)} runs passed all triggers') - logger.info(f'* {len(self.ids_failed)} runs failed at least one trigger') - logger.info(f'* {len(self.ids_success) + len(self.ids_failed)} runs total') - - if len(self.ids_failed) == 0: - logger.success(f'* {len(self.ids_success) / (len(self.ids_success) + len(self.ids_failed)) * 100:.2f}% success rate') - logger.success(f'* {len(self.ids_failed) / (len(self.ids_success) + len(self.ids_failed)) * 100:.2f}% failure rate') - else: - logger.critical(f'* {len(self.ids_success) / (len(self.ids_success) + len(self.ids_failed)) * 100:.2f}% success rate') - logger.critical(f'* {len(self.ids_failed) / (len(self.ids_success) + len(self.ids_failed)) * 100:.2f}% failure rate') - -class Validator: - """Validator class. - - This class is used to relaunch experiments that have crashed. - """ - def __init__(self, relaunch_triggers: List[RelaunchTrigger] = _load_default_triggers()) -> None: - """Initialize the validator with a list of relaunch triggers. - - Args: - relaunch_triggers (List[RelaunchTrigger], optional): A list of relaunch triggers. Defaults to _load_default_triggers(). - """ - self.relaunch_triggers = relaunch_triggers - - def validate(self, experiment): - """Validate an experiment based on the relaunch triggers. - - Args: - experiment (Experiment): The experiment to be validated. - - Returns: - ValidationResult: The results of the validation. - """ - logger.info(f'Validating experiment') - experiment_ids = experiment.params[experiment.id_column_name].unique() - - trigger2experiment = { - type(trigger).__name__: trigger.filter(experiment) for trigger in self.relaunch_triggers - } - all_failed_ids = [] - - for trigger_name, trigger_result in trigger2experiment.items(): - if len(trigger_result) == 0: - logger.success(f'Experiment is empty after applying following trigger: {trigger_name}') - continue - ids_failed = trigger_result.params[experiment.id_column_name].unique() - for id in ids_failed: - logger.error(f'Run {id} failed trigger {trigger_name}') - - all_failed_ids.extend(ids_failed) - - all_failed_ids = set(all_failed_ids) - ids_success = [id for id in experiment_ids if id not in all_failed_ids] - - return ValidationResult(ids_success, ids_failed) diff --git a/poetry.lock b/poetry.lock index eb58513..4d1bad7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -747,13 +747,13 @@ files = [ [[package]] name = "neptune" -version = "1.5.0" +version = "1.6.0" description = "Neptune Client" optional = false python-versions = ">=3.7,<4.0" files = [ - {file = "neptune-1.5.0-py3-none-any.whl", hash = "sha256:e0d150febcaa2fa516c17cdb37066bcbfd1528c1898b0cf638e6d6742b8f2ff5"}, - {file = "neptune-1.5.0.tar.gz", hash = "sha256:677c14c242609e76de9577d2c9e9f6aa41a8bdc473a01db55ef34e3c154a9d2a"}, + {file = "neptune-1.6.0-py3-none-any.whl", hash = "sha256:355bbb0a3e353e4c66f39c2c5bf9b0bc547ceba583d38c6c76618e4acde1450f"}, + {file = "neptune-1.6.0.tar.gz", hash = "sha256:b3ca24e81bd0ebdbc27a93a7274263570f834a97f808fe06ba9e863c098a24a9"}, ] [package.dependencies] @@ -1608,4 +1608,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "3.10.10" -content-hash = "666a031051c953e64e998c4c54acf7b12720a3484af99013d0633452bdd4e423" +content-hash = "013e2ae377cd3f3aa98619f0bc1ee3cd9f711873c465b2c908396d3040d86499" diff --git a/pyproject.toml b/pyproject.toml index 7aa4e9f..caf8dbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ loguru = "^0.7.0" moto = "*" boto3 = "*" urllib3 = "<2" -neptune = "1.5.0" +neptune = "^1.6.0" [build-system] requires = ["poetry-core"]