Merge pull request #80 from giotto-ai/refactor_before_master

Refactor before master
giotto-ai · Dec 18, 2019 · aa45e7e · aa45e7e
2 parents 7f0383b + e4cc41f
commit aa45e7e
Show file tree

Hide file tree

Showing 48 changed files with 1,227 additions and 962 deletions.
diff --git a/doc/conf.py b/doc/conf.py
@@ -19,7 +19,6 @@
 
 project = "giotto-time"
 copyright = "2019, L2F"
-author = "Benjamin Russell, Stefano Savarè, Alessio Baccelli"
 
 # The full version, including alpha/beta/rc tags
 from giottotime import __version__

diff --git a/doc/images/gar.png b/doc/images/gar.png
diff --git a/doc/images/no_trend.png b/doc/images/no_trend.png
diff --git a/doc/images/trend.png b/doc/images/trend.png
diff --git a/doc/images/trimmer.png b/doc/images/trimmer.png
diff --git a/doc/reference/feature_creation.rst b/doc/reference/feature_creation.rst
@@ -12,7 +12,7 @@
    :toctree: generated/
    :template: class.rst
 
-   feature_creation.FeaturesCreation
+   feature_creation.FeatureCreation
    feature_creation.ShiftFeature
    feature_creation.MovingAverageFeature
    feature_creation.ConstantFeature
@@ -43,4 +43,3 @@
    feature_creation.tda_features.AvgLifeTimeFeature
    feature_creation.tda_features.BettiCurvesFeature
    feature_creation.tda_features.NumberOfRelevantHolesFeature
-
diff --git a/doc/reference/index.rst b/doc/reference/index.rst
@@ -7,7 +7,7 @@ API Reference
 ======================================
 
 .. toctree::
-    :maxdepth: 3
+    :maxdepth: 2
     :hidden:
 
     causality_tests

diff --git a/doc/release_notes/index.rst b/doc/release_notes/index.rst
@@ -11,14 +11,14 @@ Overview
 compared to traditional time series libraries are the following:
 
 - feature creation, model selection, model assessment and prediction pipeline for time series models.
-- plug-and-play availability of any scikit-learn-compatible regression or classification model for forecasting.
-- minimization of standard custom loss functions for time series (SMAPE, max error, etc..)
-- easy-to-use scikit-learn-familiar API.
+- plug-and-play availability of any scikit-learn-compatible (i.e., in the fit-transform framework) regression or classification models for forecasting.
+- minimization of standard and custom loss functions for time series (SMAPE, max error, etc..).
+- easy-to-use scikit-learn-familiar and pandas-familiar API.
 
-Additionally we provide standard causality tests with a scikit-learn-like interface.
+Additionally we provide a causality tests with a scikit-learn-like transformer interface.
 
 
-Input-Output specifications
+Input-Output Specifications
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 **Input:** `pd.Series`, `pd.DataFrame` (single column), `np.array`, `list`
@@ -28,31 +28,6 @@ Input-Output specifications
 **Additional input parameters:** the user can pass a list of features and a scikit-learn
 compatible model to giotto-time.
 
-Example of Usage
-~~~~~~~~~~~~~~~~
-
-.. code-block:: python
-
-    from giottotime.feature_creation import FeaturesCreation
-    from giottotime.feature_creation.index_independent_features import ShiftFeature, MovingAverageFeature
-    from giottotime.model_selection.train_test_splitter import TrainTestSplitter
-    from giottotime.regressors import LinearRegressor
-    from giottotime.models.time_series_models import GAR
-
-    time_series = get_time_series()
-
-    features_creation = FeaturesCreation(
-        horizon=4,
-        features = [ShiftFeature(1), ShiftFeature(2), MovingAverageFeature(5)]
-    )
-    train_test_splitter = TrainTestSplitter()
-    time_series_model = GAR(base_model=LinearRegressor())
-
-    X, y = features_creation.transform(time_series)
-    X_train, y_train, X_test, y_test = train_test_splitter.transform(X, y)
-
-    time_series_model.fit(X_train, y_train)
-    predictions = time_series_model.predict(X_test)
 
 Time Series Preparation
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -79,28 +54,108 @@ We support the following features:
 - `ExogenousFeature`
 - `CustomFeature`
 
-The features have a scikit-learn-like interface.
+These features all have a scikit-learn-like interface and behave as transformers.
 
 The class FeatureCreation wraps a list of features together and returns the X and y
 matrices from a time series given as input.
 
 Time Series Model
 ~~~~~~~~~~~~~~~~~
-We provide the `GAR` class (Generalize Auto Regressive).
+Giotto-time provide the `GAR` class (Generalize Auto Regressive model).
 It operates in a similar way to the standard AR, but with an arbitrary number of
-features and with an arbitrary regression model.
+features and with an arbitrary underlying regression model.
+
+.. image:: ../../../../images/gar.png
+    :width: 60%
+    :align: center
+
+.. code-block:: python
+
+    from giottotime.feature_creation import FeaturesCreation
+    from giottotime.feature_creation.index_independent_features import ShiftFeature, MovingAverageFeature
+    from giottotime.model_selection.train_test_splitter import TrainTestSplitter
+    from giottotime.regressors import LinearRegressor
+    from giottotime.models.time_series_models import GAR
+
+    time_series = get_time_series()
+
+    features_creation = FeaturesCreation(
+        horizon=4,
+        features = [ShiftFeature(1), ShiftFeature(2), MovingAverageFeature(5)]
+    )
+    train_test_splitter = TrainTestSplitter()
+    time_series_model = GAR(base_model=LinearRegressor())
+
+    X, y = features_creation.transform(time_series)
+    X_train, y_train, X_test, y_test = train_test_splitter.transform(X, y)
+
+    time_series_model.fit(X_train, y_train)
+    predictions = time_series_model.predict(X_test)
 
 Time Series Trend Model
 ~~~~~~~~~~~~~~~~~~~~~~~
-We provide three main classes to analyze and remove trends from time series:
-- `FunctionTrend`
-- `ExponentialTrend`
-- `PolynomialTrend`
+We provide main classes to analyze and remove trends from time series in order to create trend stationary time series.
+
+Specifically, giotto-time includes `ExponentialTrend`, `PolynomialTrend` model classes and de-trending transformers.
+
+Example of Usage
+~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    import numpy as np
+    import pandas as pd
+
+    import matplotlib.pyplot as plt
+
+    from giottotime.models.regressors.linear_regressor import LinearRegressor
+    from giottotime.loss_functions.loss_functions import max_error, smape
+
+    from giottotime.models.trend_models.polynomial_trend import PolynomialTrend
+
+    from math import pi
+
+    d = pd.read_csv('trend.csv', index_col=0, parse_dates=True)
+    tm = PolynomialTrend(order=3)
+
+    tm.fit(d)
+
+    d.plot(figsize=(10, 10))
+    plt.show()
+
+    detrended = tm.transform(d)
+
+    detrended.plot(figsize=(10, 10))
+    plt.show()
+
+Before the detrending tranformer, a clear quadratic trend is present in the data:
+
+.. image:: ../../../../images/trend.png
+    :width: 60%
+    :align: center
+
+After fitting and applying the detrending tranformer, a the transformed data is 'trend stationary':
+
+.. image:: ../../../../images/no_trend.png
+    :width: 60%
+    :align: center
+
+For additional information on trend stationarity, see:
+Trend stationarity: `Wikipedia - Trend stationarity <https://en.wikipedia.org/wiki/Trend_stationary />`_.
+
 
 Model Selection and Cross Validation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- `trim_feature_nans`
+
+.. image:: ../../../../images/trimmer.png
+    :width: 60%
+    :align: center
+
 - `TrainTestSplitter`
 
+
+
 Custom Regressors
 ~~~~~~~~~~~~~~~~~
 
@@ -110,6 +165,18 @@ Causality Tests
 ~~~~~~~~~~~~~~~
 We provide two tests: `ShiftedLinearCoefficient` and `ShiftedPearsonCorrelation`.
 
+.. code-block:: python
+
+    import numpy as np
+    import pandas as pd
+
+    import matplotlib.pyplot as plt
+
+    from giottotime.causality_tests import ShiftedPearsonCorrelation
+
+    #TODO
+
+
 Release 0.2.0 (to be discussed)
 -------------------------------
 To be discussed.
diff --git a/giottotime/base/constants.py b/giottotime/base/constants.py
diff --git a/giottotime/causality_tests/__init__.py b/giottotime/causality_tests/__init__.py
@@ -1,2 +1,10 @@
+from .base import CausalityTest
 from .shifted_linear_coefficient import ShiftedLinearCoefficient
 from .shifted_pearson_correlation import ShiftedPearsonCorrelation
+
+
+__all__ = [
+    "CausalityTest",
+    "ShiftedLinearCoefficient",
+    "ShiftedPearsonCorrelation",
+]
diff --git a/giottotime/causality_tests/shifted_linear_coefficient.py b/giottotime/causality_tests/shifted_linear_coefficient.py
@@ -3,9 +3,9 @@
 import numpy as np
 import pandas as pd
 from sklearn.linear_model import LinearRegression
+from sklearn.utils.validation import check_is_fitted
 
 from giottotime.causality_tests.base import CausalityTest
-from giottotime.models.utils import check_is_fitted
 
 
 class ShiftedLinearCoefficient(CausalityTest):
@@ -14,33 +14,34 @@ class ShiftedLinearCoefficient(CausalityTest):
 
     Parameters
     ----------
-    max_shift : ``int``, optional, (default=``10``).
+    max_shift : int, optional, default: ``10``
+        The maximum number of shifts to check for.
 
-    target_col : ``str``, optional, (default='y').
-            The column to use as the a reference (i.e., the columns which is not shifted).
+    target_col : str, optional, default: ``'y'``
+            The column to use as the a reference (i.e., the columns which is not
+            shifted).
 
-    dropna : ``bool``, optional, (default=False).
+    dropna : bool, optional, default: ``False``
         Determines if the Nan values created by shifting are retained or dropped.
 
     """
 
     def __init__(
         self, max_shift: int = 10, target_col: str = "y", dropna: bool = False
     ):
-        self._max_shift = max_shift
-        self._target_col = target_col
-        self._dropna = dropna
+        self.max_shift = max_shift
+        self.target_col = target_col
+        self.dropna = dropna
 
     def fit(self, data: pd.DataFrame) -> "ShiftedLinearCoefficient":
-        """Create the dataframe of shifts of each time series which maximize
-        the shifted linear fit coefficients.
+        """Create the dataframe of shifts of each time series which maximize the shifted
+         linear fit coefficients.
 
         Parameters
         ----------
-        data : ``pd.DataFrame``, required.
-            The time-series on which to compute the shifted linear fit coefficients.
-
-        max_shift : ``int``, optional, (default=10).
+        data : pd.DataFrame, shape (n_samples, n_time_series), required.
+            The DataFrame containing the time-series on which to compute the shifted
+            linear fit coefficients.
 
         Returns
         -------
@@ -53,7 +54,7 @@ def fit(self, data: pd.DataFrame) -> "ShiftedLinearCoefficient":
         )
 
         for x, y in product(data.columns, repeat=2):
-            res = self._get_max_coeff_shift(data, self._max_shift, x=x, y=y)
+            res = self._get_max_coeff_shift(data, self.max_shift, x=x, y=y)
 
             best_shift = res[1]
             max_corr = res[0]
@@ -77,34 +78,33 @@ def fit(self, data: pd.DataFrame) -> "ShiftedLinearCoefficient":
         return self
 
     def transform(self, data: pd.DataFrame) -> pd.DataFrame:
-        """Shifts each input timeseries but the amount which maximizes
-        shifted linear fit coefficients with the selected 'y' colums.
+        """Shifts each input timeseries but the amount which maximizes shifted linear
+        fit coefficients with the selected 'y' columns.
 
         Parameters
         ----------
-        data : ``pd.DataFrame``, required.
-            The time-series on which to perform the transformation.
+        data : pd.DataFrame, shape (n_samples, n_time_series), required.
+            The DataFrame containing the time series on which to perform the
+            transformation.
 
         Returns
         -------
-        shifted_data : ``pd.DataFrame``
-            The dataframe (Pivot table) of the shifts which maximize the shifted linear
+        data_t : pd.DataFrame, shape (n_samples, n_time_series)
+            The DataFrame (Pivot table) of the shifts which maximize the shifted linear
             fit coefficients between each timeseries. The shift is indicated in rows.
 
         """
-        check_is_fitted(self)
-        shifted_data = data.copy()
+        check_is_fitted(self, ["best_shifts_", "max_corrs_"])
+        data_t = data.copy()
 
-        for col in shifted_data:
-            if col != self._target_col:
-                shifted_data[col] = shifted_data[col].shift(
-                    self.best_shifts_[col][self._target_col]
-                )
+        for col in data_t:
+            if col != self.target_col:
+                data_t[col] = data_t[col].shift(self.best_shifts_[col][self.target_col])
 
-        if self._dropna:
-            shifted_data = shifted_data.dropna()
+        if self.dropna:
+            data_t = data_t.dropna()
 
-        return shifted_data
+        return data_t
 
     def _get_max_coeff_shift(
         self, data: pd.DataFrame, max_shift: int, x: str = "x", y: str = "y"