diff --git a/.gitignore b/.gitignore index 632e72b..0ea875b 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,7 @@ target/ # Jupyter NB Checkpoints .ipynb_checkpoints/ +Untitled* # exclude data from source control by default /data/ diff --git a/README.rst b/README.rst index 4ccf67e..37b78c3 100644 --- a/README.rst +++ b/README.rst @@ -34,7 +34,7 @@ Giotto-time provide the GAR class (Generalize Auto Regressive model). It operate .. raw:: html

- +

This model allows the full force of machine learning regressors (compatible with the fit-transform framework ok scikit-learn) to be combined with advanced feature creation stratagies to forecast time series in a convienent api. @@ -67,28 +67,26 @@ Time Series Preparation To transform an input array-like structure into a DataFrame with a PeriodIndex we provide the classes: -To transform an input array-like structure into a DataFrame with a PeriodIndex we provide the classes: - * TimeSeriesPreparation * TimeSeriesConversion * SequenceToTimeIndexSeries * PandasSeriesToTimeIndexSeries * TimeIndexSeriesToPeriodIndexSeries - Feature Creation ================ The following time series features are currently supported: -CalendarFeature -PeriodicSeasonalFeature -ShiftFeature -MovingAverageFeature -ConstantFeature -PolynomialFeature -ExogenousFeature -CustomFeature +* CalendarFeature +* PeriodicSeasonalFeature +* ShiftFeature +* MovingAverageFeature +* ConstantFeature +* PolynomialFeature +* ExogenousFeature +* CustomFeature + These features all have a scikit-learn-like interface and behave as transformers. The class FeatureCreation wraps a list of features together and returns the X and y matrices from a time series given as input. @@ -136,5 +134,61 @@ Specifically, giotto-time includes ExponentialTrend, PolynomialTrend model class Before the detrending tranformer, a clear quadratic trend is present in the data. For additional information on trend stationarity, see: Trend stationarity: Wikipedia - https://en.wikipedia.org/wiki/Trend_stationary. +Custom Regressors +================= + +LinearRegressor is a linear regressor class that minimizes a custom loss function (compatitble with all scikit-learn metrics). + +.. raw:: html + +

+ +

+ +In time series forecasting, it can be essential to minimize error metrics other than the standard R squared. Using this regressor class, it is possible to fit smape, max error and a range of other time series forecasting metrics easily with a simple interface via the GAR class. + +>>> from giottotime.models.regressors.linear_regressor import LinearRegressor +>>> from giottotime.loss_functions import max_error +>>> import numpy as np +>>> import pandas as pd +>>> X = np.random.random((100, 10)) +>>> y = np.random.random(100) +>>> lr = LinearRegressor(loss=max_error) +>>> X_train, y_train = X[:90], y[:90] +>>> X_test, y_test = X[90:], y[90:] +>>> x0 = [0]*11 +>>> lr.fit(X_train, y_train, x0=x0) +>>> y_pred = lr.predict(X_test) + +Causality Tests +=============== + +We provide two tests: ShiftedLinearCoefficient and ShiftedPearsonCorrelation. + +These tests (which are impliemnted as scikit-learn compatible transformers) determine which shift of each time series maximizes the correlation to each other input time series. This is a very similar construction tothe granger test. + +An example use is shown below. + +>>> from giottotime.causality_tests.shifted_linear_coefficient import ShiftedLinearCoefficient +>>> import pandas.util.testing as testing +>>> data = testing.makeTimeDataFrame(freq="s") +>>> slc = ShiftedLinearCoefficient(target_col="A") +>>> slc.fit(data) +>>> slc.best_shifts_ +y A B C D +x +A 3 6 8 5 +B 9 9 4 1 +C 8 2 4 9 +D 3 9 4 3 +>>> slc.max_corrs_ +y A B C D +x +A 0.460236 0.420005 0.339370 0.267143 +B 0.177856 0.300350 0.367150 0.550490 +C 0.484860 0.263036 0.456046 0.251342 +D 0.580068 0.344688 0.253626 0.256220 + +The target-col input variable to the constructor is used in the transform method. It determins which set of shifts are applied to all inputs. For example, if 'A' is selected, each column will be transform by a shift corresponding to the 'A' row of the bests_shifts_ pivot table. diff --git a/examples/README.rst b/examples/README.rst new file mode 100644 index 0000000..51189c4 --- /dev/null +++ b/examples/README.rst @@ -0,0 +1,27 @@ +.. image:: https://www.giotto.ai/static/vector/logo.svg + :width: 850 + +Examples and Tutorials +====================== + +In this folder you can find basic tutorials and examples: you can read through them to +understand how `giotto-time` works. + +Quick start +----------- + +This tutorial is about giving an overview on the basic features of `giotto-time`. +You will learn how to train a simple time series model with custom features. +Some considerations on input-output are presented. + +Details and advanced features +----------------------------- + +This tutorial details more advanced features of `giotto-time`. +You will learn more details on the feature generation and on custom linear regressor +model for time series forecasting. + +Causality Tests +--------------- + +This tutorial details the causality tests built-in in `giotto-time`. diff --git a/examples/quick-start.ipynb b/examples/quick-start.ipynb new file mode 100644 index 0000000..ad099c0 --- /dev/null +++ b/examples/quick-start.ipynb @@ -0,0 +1,580 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:34:45.079040Z", + "start_time": "2019-12-19T10:34:45.075853Z" + } + }, + "source": [ + "# Giotto-Time\n", + "\n", + "Welcome to `giotto-time`, our new library for time series forecasting!\n", + "\n", + "Let's start with an example." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:37:13.829605Z", + "start_time": "2019-12-19T10:37:13.827033Z" + } + }, + "source": [ + "## First example" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:51:37.701263Z", + "start_time": "2019-12-19T10:51:37.698686Z" + } + }, + "source": [ + "### Ingredients" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:43:03.249232Z", + "start_time": "2019-12-19T10:43:03.244743Z" + } + }, + "source": [ + "These are the main ingredients of `giotto-time`:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:40.433188Z", + "start_time": "2019-12-20T11:08:39.863805Z" + } + }, + "outputs": [], + "source": [ + "from giottotime.time_series_preparation import TimeSeriesPreparation\n", + "from giottotime.feature_creation import FeatureCreation, ShiftFeature, MovingAverageFeature\n", + "from giottotime.model_selection import FeatureSplitter\n", + "from giottotime.models import GAR" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:43:23.300668Z", + "start_time": "2019-12-19T10:43:23.100775Z" + } + }, + "source": [ + "- `TimeSeriesPreparation`: checks the input format of the time series and converts it to the expected format.\n", + "- `FeatureCreation`, `ShiftFeature`, `MovingAverageFeature`: create the desired features on the time series that are \n", + " used for the forecasting.\n", + "- `FeatureSplitter`: prepares the custom `giotto-time` train-test matrices that are used in the model\n", + "- `GAR`: generalized-auto-regressive model. This is the only time series model that we provide for the first release." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:50:42.797962Z", + "start_time": "2019-12-19T10:50:42.792529Z" + } + }, + "source": [ + "We also need a `scikit-learn`-model. We go for a standard linear regressor for this example" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:41.268423Z", + "start_time": "2019-12-20T11:08:41.265378Z" + } + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:51:15.298065Z", + "start_time": "2019-12-19T10:51:15.295733Z" + } + }, + "source": [ + "### Data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T10:55:55.362286Z", + "start_time": "2019-12-19T10:55:55.358045Z" + } + }, + "source": [ + "We use the `pandas.testing` module to create a testing time series" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:42.074384Z", + "start_time": "2019-12-20T11:08:42.070697Z" + } + }, + "outputs": [], + "source": [ + "def test_time_series():\n", + " from pandas.util import testing as testing\n", + " \n", + " testing.N, testing.K = 500, 1\n", + " df = testing.makeTimeDataFrame( freq=\"D\" )\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:42.366492Z", + "start_time": "2019-12-20T11:08:42.361791Z" + } + }, + "outputs": [], + "source": [ + "time_series = test_time_series()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T13:37:09.941132Z", + "start_time": "2019-12-19T13:37:09.938476Z" + } + }, + "source": [ + "### Time Series Preparation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T13:37:33.357619Z", + "start_time": "2019-12-19T13:37:33.347192Z" + } + }, + "source": [ + "The input time series has to be a `pandas.DataFrame` with a `PeriodIndex`. Use the provided class `TimeSeriesPreparation` to convert the time series in this format" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:43.161252Z", + "start_time": "2019-12-20T11:08:43.158360Z" + } + }, + "outputs": [], + "source": [ + "time_series_preparation = TimeSeriesPreparation()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:43.428293Z", + "start_time": "2019-12-20T11:08:43.421929Z" + } + }, + "outputs": [], + "source": [ + "period_index_time_series = time_series_preparation.transform(time_series)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T14:15:31.332440Z", + "start_time": "2019-12-19T14:15:31.322583Z" + } + }, + "source": [ + "### Feature Creation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T14:20:25.312078Z", + "start_time": "2019-12-19T14:20:25.307741Z" + } + }, + "source": [ + "The feature creation part is one of the core part of our library and the bridge between traditional time series forecasting techniques and machine learning.\n", + "\n", + "Starting with a time series in a `pandas.DataFrame`, we create two matrices `X` and `y` which can be used for training and testing." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T14:49:59.729021Z", + "start_time": "2019-12-19T14:49:59.718573Z" + } + }, + "source": [ + "We provide 12 different features. For simplicity we train a model using only `ShiftFeature` and `MovingAverageFeature`. \n", + "\n", + "`ShiftFeature` provides a temporal shift of the time series. Adding two `ShiftFeature` with shifts 1 and 2 is equivalent to an `AR(2)` model. \n", + "\n", + "The possibility to add the features that you want allows you to choose the model that best fits your data." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:44.450001Z", + "start_time": "2019-12-20T11:08:44.446647Z" + } + }, + "outputs": [], + "source": [ + "features = [\n", + " ShiftFeature(1, output_name='shift_1'),\n", + " ShiftFeature(2, output_name='shift_2'),\n", + " MovingAverageFeature(3, output_name='moving_average_3'),\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:44.737915Z", + "start_time": "2019-12-20T11:08:44.734648Z" + } + }, + "outputs": [], + "source": [ + "feature_creation = FeatureCreation(time_series_features=features, horizon=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:45.045070Z", + "start_time": "2019-12-20T11:08:45.022402Z" + } + }, + "outputs": [], + "source": [ + "features_X, features_y = feature_creation.fit_transform(period_index_time_series)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T14:42:32.549572Z", + "start_time": "2019-12-19T14:42:32.547124Z" + } + }, + "source": [ + "### Train-Test split" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T10:59:18.112521Z", + "start_time": "2019-12-20T10:59:18.108823Z" + } + }, + "source": [ + "We use `FeatureSplitter` to split the matrices X and y in train and test. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:45.885739Z", + "start_time": "2019-12-20T11:08:45.882557Z" + } + }, + "outputs": [], + "source": [ + "feature_splitter = FeatureSplitter()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:46.240108Z", + "start_time": "2019-12-20T11:08:46.221414Z" + } + }, + "outputs": [], + "source": [ + "X_train, y_train, X_test, y_test = feature_splitter.transform(features_X, features_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-19T14:44:02.820817Z", + "start_time": "2019-12-19T14:44:02.818276Z" + } + }, + "source": [ + "### Training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:01:12.922844Z", + "start_time": "2019-12-20T11:01:12.919591Z" + } + }, + "source": [ + "We provide a `GAR` (Generalized Auto Regressive) model to forecast the time series.\n", + "\n", + "The traditional `AR` model is equivalent to our `GAR` model that uses only `ShiftFeature` columns in the `X` matrix.\n", + "`GAR` supports all the features compatible with the feature creation step.\n", + "\n", + "Moreover, `GAR` internally uses a `scikit-learn` compatible model for the internal time series regression. \n", + "In this example we use `LinearRegression`. A priori all the `fit-transform-predict` models are compatible (e.g. ridge regression, random forest, boosting, etc.. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:47.555831Z", + "start_time": "2019-12-20T11:08:47.553017Z" + } + }, + "outputs": [], + "source": [ + "model = GAR(base_model=LinearRegression())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:48.059122Z", + "start_time": "2019-12-20T11:08:48.050062Z" + } + }, + "outputs": [], + "source": [ + "model = model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:01:01.280526Z", + "start_time": "2019-12-20T11:01:01.278125Z" + } + }, + "source": [ + "### Forecasting" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:10:02.544672Z", + "start_time": "2019-12-20T11:10:02.540859Z" + } + }, + "source": [ + "We forecast 3 time steps of the time series (we set this parameter in `FeatureCreation`).\n", + "\n", + "The format of the output is the following:\n", + "- the index is the step at which the prediction is made.\n", + "- the column `y_1` is the prediction one time step after and so on for `y_2` and `y_3`" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:48.939181Z", + "start_time": "2019-12-20T11:08:48.931145Z" + } + }, + "outputs": [], + "source": [ + "predictions = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2019-12-20T11:08:50.014625Z", + "start_time": "2019-12-20T11:08:49.989948Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
y_1y_2y_3
2001-05-12-0.149298-0.164899-0.092473
2001-05-13-0.150681-0.085710-0.063871
2001-05-14-0.066199-0.134353-0.095745
\n", + "
" + ], + "text/plain": [ + " y_1 y_2 y_3\n", + "2001-05-12 -0.149298 -0.164899 -0.092473\n", + "2001-05-13 -0.150681 -0.085710 -0.063871\n", + "2001-05-14 -0.066199 -0.134353 -0.095745" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/giottotime/_version.py b/giottotime/_version.py index 4bd8f92..363eee3 100644 --- a/giottotime/_version.py +++ b/giottotime/_version.py @@ -21,4 +21,4 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.1a0" +__version__ = "0.1.0" diff --git a/giottotime/time_series_preparation/__init__.py b/giottotime/time_series_preparation/__init__.py index de21fa1..d59471a 100755 --- a/giottotime/time_series_preparation/__init__.py +++ b/giottotime/time_series_preparation/__init__.py @@ -1,6 +1,6 @@ """ The :mod:`giottotime.feature_creation` module deals with the preparation of time series -data, such as index conversions and resampling. +data, such as conversion to `pandas.DataFrame` with a `PeriodIndex`. """ from .time_series_conversion import ( diff --git a/giottotime/time_series_preparation/tests/test_time_series_conversion.py b/giottotime/time_series_preparation/tests/test_time_series_conversion.py index f23265a..884eb5d 100644 --- a/giottotime/time_series_preparation/tests/test_time_series_conversion.py +++ b/giottotime/time_series_preparation/tests/test_time_series_conversion.py @@ -332,6 +332,20 @@ def test_only_timedelta_index_as_input(self, timedelta_index_series: pd.Series): ) assert_series_equal(computed_series, expected_series) + def test_basic_timedelta_index_as_input(self): + timedelta_index_series = pd.Series( + index=pd.timedelta_range(start=pd.Timedelta(days=1), freq="10D", periods=3), + data=[1, 2, 3], + ) + expected_series = pd.Series( + index=pd.PeriodIndex(["1970-01-02", "1970-01-12", "1970-01-22"], freq="D"), + data=[1, 2, 3], + ) + computed_series = transform_time_index_series_into_period_index_series( + timedelta_index_series + ) + assert_series_equal(computed_series, expected_series) + @given(series_with_timedelta_index(), available_freqs()) def test_timedelta_index_and_freq_as_input( self, timedelta_index_series: pd.Series, freq: pd.Timedelta diff --git a/giottotime/time_series_preparation/tests/test_time_series_preparation.py b/giottotime/time_series_preparation/tests/test_time_series_preparation.py index 33e8821..c455320 100644 --- a/giottotime/time_series_preparation/tests/test_time_series_preparation.py +++ b/giottotime/time_series_preparation/tests/test_time_series_preparation.py @@ -128,6 +128,39 @@ def test_wrong_input_type(self, wrong_input: Tuple): with pytest.raises(TypeError): time_series_preparation._to_time_index_series(wrong_input) + @given(series_with_period_index(), st.datetimes(), available_freqs()) + def test_period_index_dataframe_unchanged( + self, period_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta, + ): + period_index_dataframe = pd.DataFrame(period_index_series) + time_series_preparation = TimeSeriesPreparation(start=start, freq=freq) + computed_time_series = time_series_preparation._to_time_index_series( + period_index_dataframe + ) + assert_series_equal(computed_time_series, period_index_series) + + @given(series_with_datetime_index(), st.datetimes(), available_freqs()) + def test_datetime_index_dataframe_unchanged( + self, datetime_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta, + ): + datetime_index_dataframe = pd.DataFrame(datetime_index_series) + time_series_preparation = TimeSeriesPreparation(start=start, freq=freq) + computed_time_series = time_series_preparation._to_time_index_series( + datetime_index_dataframe + ) + assert_series_equal(computed_time_series, datetime_index_series) + + @given(series_with_timedelta_index(), st.datetimes(), available_freqs()) + def test_timedelta_index_dataframe_unchanged( + self, timedelta_index_series: pd.Series, start: pd.datetime, freq: pd.Timedelta, + ): + timedelta_index_dataframe = pd.DataFrame(timedelta_index_series) + time_series_preparation = TimeSeriesPreparation(start=start, freq=freq) + computed_time_series = time_series_preparation._to_time_index_series( + timedelta_index_dataframe + ) + assert_series_equal(computed_time_series, timedelta_index_series) + class TestToEquispacedTimeSeries: @given( diff --git a/giottotime/time_series_preparation/time_series_conversion.py b/giottotime/time_series_preparation/time_series_conversion.py index bf0c2f1..cd04546 100644 --- a/giottotime/time_series_preparation/time_series_conversion.py +++ b/giottotime/time_series_preparation/time_series_conversion.py @@ -186,6 +186,19 @@ class SequenceToTimeIndexSeries(TimeSeriesConversion): frequency of the output time series. Not mandatory for all time series conversion. + Examples + -------- + >>> from giottotime.time_series_preparation import SequenceToTimeIndexSeries + >>> time_series = [1,2,3,5,5,7] + >>> sequence_to_time_index = SequenceToTimeIndexSeries(start='01-01-2010', freq='10D') + >>> sequence_to_time_index.transform(time_series) + 2010-01-01 1 + 2010-01-11 2 + 2010-01-21 3 + 2010-01-31 5 + 2010-02-10 5 + 2010-02-20 7 + Freq: 10D, dtype: int64 """ def __init__( @@ -209,7 +222,7 @@ def _get_values_from( class PandasSeriesToTimeIndexSeries(TimeSeriesConversion): """Returns a Pandas Series with time index (DatetimeIndex, TimedeltaIndex or - PeriodIndex from a standard Pandas Series + PeriodIndex) from a standard Pandas Series Parameters ---------- @@ -222,6 +235,20 @@ class PandasSeriesToTimeIndexSeries(TimeSeriesConversion): freq : pd.Timedelta``, optional, default: ``None`` The frequency of the time series. + Examples + -------- + >>> import pandas as pd + >>> from giottotime.time_series_preparation import PandasSeriesToTimeIndexSeries + >>> time_series = pd.Series([1,2,3,5,5,7]) + >>> sequence_to_time_index = PandasSeriesToTimeIndexSeries(start='01-01-2010', freq='10D') + >>> sequence_to_time_index.transform(time_series) + 2010-01-01 1 + 2010-01-11 2 + 2010-01-21 3 + 2010-01-31 5 + 2010-02-10 5 + 2010-02-20 7 + Freq: 10D, dtype: int64 """ def __init__( @@ -255,7 +282,8 @@ def _has_time_index(self, time_series: pd.Series) -> bool: class TimeIndexSeriesToPeriodIndexSeries(TimeSeriesConversion): - """Converts a series with a time index to a series with a PeriodIndex. + """Converts a series with a time index (DatetimeIndex, TimedeltaIndex or + PeriodIndex) to a series with a PeriodIndex. It may be necessary to specify a `freq` if not already provided. @@ -264,6 +292,47 @@ class TimeIndexSeriesToPeriodIndexSeries(TimeSeriesConversion): freq : pd.Timedelta, optional, default: ``None`` The frequency of the time series. + Examples + -------- + >>> import pandas as pd + >>> from giottotime.time_series_preparation import TimeIndexSeriesToPeriodIndexSeries + >>> period_index_time_series = pd.Series( + ... index = pd.period_range(start='01-01-2010', freq='10D', periods=6), + ... data=[1,2,3,5,5,7] + ... ) + >>> datetime_index_time_series = pd.Series( + ... index = pd.date_range(start='01-01-2010', freq='10D', periods=6), + ... data=[1,2,3,5,5,7] + ... ) + >>> timedelta_index_time_series = pd.Series( + ... index = pd.timedelta_range(start=pd.Timedelta(days=1), freq='10D', periods=6), + ... data=[1,2,3,5,5,7] + ... ) + >>> sequence_to_time_index = TimeIndexSeriesToPeriodIndexSeries() + >>> sequence_to_time_index.transform(period_index_time_series) + 2010-01-01 1 + 2010-01-11 2 + 2010-01-21 3 + 2010-01-31 5 + 2010-02-10 5 + 2010-02-20 7 + freq: 10d, dtype: int64 + >>> sequence_to_time_index.transform(datetime_index_time_series) + 2010-01-01 1 + 2010-01-11 2 + 2010-01-21 3 + 2010-01-31 5 + 2010-02-10 5 + 2010-02-20 7 + freq: 10d, dtype: int64 + >>> sequence_to_time_index.transform(timedelta_index_time_series) + 1970-01-02 1 + 1970-01-12 2 + 1970-01-22 3 + 1970-02-01 5 + 1970-02-11 5 + 1970-02-21 7 + Freq: D, dtype: int64 """ def __init__(self, freq: Optional[pd.Timedelta] = None): diff --git a/giottotime/time_series_preparation/time_series_preparation.py b/giottotime/time_series_preparation/time_series_preparation.py index 2e909e6..d5685fe 100644 --- a/giottotime/time_series_preparation/time_series_preparation.py +++ b/giottotime/time_series_preparation/time_series_preparation.py @@ -51,6 +51,55 @@ class TimeSeriesPreparation: ValueError Of the three parameters: start, end, and periods, exactly two must be specified. + Examples + -------- + >>> time_series = [1,2,3,5,5,7] + >>> period_index_time_series = pd.Series( + ... index = pd.period_range(start='01-01-2010', freq='10D', periods=6), + ... data=[1,2,3,5,5,7] + ... ) + >>> datetime_index_time_series = pd.Series( + ... index = pd.date_range(start='01-01-2010', freq='10D', periods=6), + ... data=[1,2,3,5,5,7] + ... ) + >>> timedelta_index_time_series = pd.Series( + ... index = pd.timedelta_range(start=pd.Timedelta(days=1), freq='10D', periods=6), + ... data=[1,2,3,5,5,7] + ... ) + >>> time_series_preparation = TimeSeriesPreparation() + >>> time_series_preparation.transform(time_series) + time_series + 1970-01-01 1 + 1970-01-02 2 + 1970-01-03 3 + 1970-01-04 5 + 1970-01-05 5 + 1970-01-06 7 + >>> time_series_preparation.transform(period_index_time_series) + time_series + 2010-01-01 1 + 2010-01-11 2 + 2010-01-21 3 + 2010-01-31 5 + 2010-02-10 5 + 2010-02-20 7 + >>> time_series_preparation.transform(datetime_index_time_series) + time_series + 2010-01-01 1 + 2010-01-11 2 + 2010-01-21 3 + 2010-01-31 5 + 2010-02-10 5 + 2010-02-20 7 + >>> time_series_preparation.transform(timedelta_index_time_series) + time_series + 1970-01-02 1 + 1970-01-12 2 + 1970-01-22 3 + 1970-02-01 5 + 1970-02-11 5 + 1970-02-21 7 + """ def __init__( @@ -78,13 +127,15 @@ def __init__( self.freq ) - def transform(self, time_series: Union[List, np.array, pd.Series]) -> pd.DataFrame: + def transform( + self, time_series: Union[List, np.array, pd.Series, pd.DataFrame] + ) -> pd.DataFrame: """Transforms an array-like sequence in a period-index DataFrame with a single column. Parameters ---------- - time_series : Union[List, np.array, pd.Series], required + time_series : Union[List, np.array, pd.Series, pd.DataFrame], required The input time series. Returns @@ -104,9 +155,11 @@ def transform(self, time_series: Union[List, np.array, pd.Series]) -> pd.DataFra return period_index_dataframe def _to_time_index_series( - self, array_like_object: Union[List, np.array, pd.Series] + self, array_like_object: Union[List, np.array, pd.Series, pd.DataFrame] ) -> pd.Series: - if isinstance(array_like_object, pd.Series): + if isinstance(array_like_object, pd.DataFrame): + return self.pandas_converter.transform(array_like_object.iloc[:, 0]) + elif isinstance(array_like_object, pd.Series): return self.pandas_converter.transform(array_like_object) elif any( isinstance(array_like_object, type_) for type_ in SUPPORTED_SEQUENCE_TYPES