From f8d87ea64b5a8ab93935c4c6962ac718c77da280 Mon Sep 17 00:00:00 2001 From: jvdd Date: Wed, 15 May 2024 13:09:11 +0200 Subject: [PATCH] :tada: allow non datetimeindex for processing --- tsflex/processing/series_pipeline.py | 3 +-- tsflex/processing/series_processor.py | 8 ++++---- tsflex/utils/data.py | 10 +++++++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tsflex/processing/series_pipeline.py b/tsflex/processing/series_pipeline.py index c67a359..052396d 100644 --- a/tsflex/processing/series_pipeline.py +++ b/tsflex/processing/series_pipeline.py @@ -194,8 +194,7 @@ def process( for s in to_series_list(data): # Assert the assumptions we make! if len(s): - assert isinstance(s.index, pd.DatetimeIndex) - # TODO: also check monotonic increasing? + assert s.index.is_monotonic_increasing if s.name in self.get_required_series(): series_dict[str(s.name)] = s.copy() if copy else s diff --git a/tsflex/processing/series_processor.py b/tsflex/processing/series_processor.py index 4f5253a..e08c1ec 100644 --- a/tsflex/processing/series_processor.py +++ b/tsflex/processing/series_processor.py @@ -321,9 +321,9 @@ def _handle_seriesprocessor_func_output( # Nothing has to be done! A pd.DataFrame can be added to a series_dict using # series_dict.update(df) # Note: converting this to a dictionary (to_dict()) is **very** inefficient! - # Assert that the DataFrame has a time-index + # Assert that the DataFrame has a time-index or a range-index if len(func_output): - assert isinstance(func_output.index, pd.DatetimeIndex) + assert isinstance(func_output.index, (pd.DatetimeIndex, pd.RangeIndex)) # Assert that the DataFrame columns are named assert all( func_output.columns.values != [i for i in range(func_output.shape[1])] @@ -334,9 +334,9 @@ def _handle_seriesprocessor_func_output( # Convert series to series_dict and return # => if func_output.name is in the required_dict, than the original series will # be replaced by this new series. - # Assert that the series has a time-index + # Assert that the series has a time-index or a range-index if len(func_output): - assert isinstance(func_output.index, pd.DatetimeIndex) + assert isinstance(func_output.index, (pd.DatetimeIndex, pd.RangeIndex)) # Assert (func_output.name is not None) | (len(required_dict) == 1) if func_output.name is None: # If a series without a name is returned that is constructed from just 1 diff --git a/tsflex/utils/data.py b/tsflex/utils/data.py index 35fcea5..6155b30 100644 --- a/tsflex/utils/data.py +++ b/tsflex/utils/data.py @@ -42,9 +42,17 @@ def series_dict_to_df(series_dict: Dict[str, pd.Series]) -> pd.DataFrame: return pd.DataFrame(series_dict) # 1. Check if the time-indexes of the series are equal, to create the df efficiently try: + + def _get_index_freq(index): # type: ignore[no-untyped-def] + if isinstance(index, pd.DatetimeIndex): + return index.freq + elif isinstance(index, pd.RangeIndex): + return index.step + return None + index_info = set( [ - (s.index[0], s.index[-1], len(s), s.index.freq) + (s.index[0], s.index[-1], len(s), _get_index_freq(s.index)) for s in series_dict.values() ] )