Revert "Merge pull request #802 from AIStream-Peelout/multimodal_models"

This reverts commit df94e25, reversing changes made to 1b2c801.
AIStream-Peelout · Oct 8, 2024 · 82d4d08 · 82d4d08
1 parent df94e25
commit 82d4d08
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 52 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -4,10 +4,8 @@ sphinx-autodoc-typehints
 pandas
 numpy
 torch
-matplotlib
 plotly
 google-cloud-storage
 scikit-learn
 wandb
 shap
-einops
diff --git a/docs/source/basic_utils.rst b/docs/source/basic_utils.rst
@@ -1,5 +1,5 @@
 Basic Google Cloud Platform Utilities
-=====================================
+================
 
 Flow Forecast natively integrates with Google Cloud Platform.
 

diff --git a/docs/source/crossformer.rst b/docs/source/crossformer.rst
@@ -1,4 +1,4 @@
 Crossformer
 =========================
-.. automodule:: flood_forecast.transformer_xl.cross_former
+.. automodule:: flood_forecast.transformer_xl.crossformer
     :members:
diff --git a/docs/source/custom_opt.rst b/docs/source/custom_opt.rst
@@ -1,5 +1,5 @@
 Custom Optimizers and more
-==========================
+====================
 
 .. automodule:: flood_forecast.custom.custom_opt
     :members:
diff --git a/docs/source/explain_model_output.rst b/docs/source/explain_model_output.rst
@@ -1,5 +1,5 @@
 Explain Model Output
-====================
+=================
 
 .. automodule:: flood_forecast.explain_model_output
     :members:
diff --git a/flood_forecast/preprocessing/pytorch_loaders.py b/flood_forecast/preprocessing/pytorch_loaders.py
@@ -2,7 +2,7 @@
 import numpy as np
 import pandas as pd
 import torch
-from typing import Dict, Tuple, Union, Optional, List
+from typing import Dict, Tuple, Union, List
 from flood_forecast.pre_dict import interpolate_dict
 from flood_forecast.preprocessing.buil_dataset import get_data
 from datetime import datetime
@@ -21,17 +21,15 @@ def __init__(
         scaling=None,
         start_stamp: int = 0,
         end_stamp: int = None,
-        gcp_service_key: Optional[str] = None,
         interpolate_param: bool = False,
         sort_column=None,
         scaled_cols=None,
         feature_params=None,
         no_scale=False,
-        preformatted_df=False
 
     ):
-        """
-        A data loader that takes a CSV file and properly batches for use in training/eval a PyTorch model
+        """A data loader that takes a CSV file and properly batches for use in training/eval a PyTorch model.
+
         :param file_path: The path to the CSV file you wish to use (GCS compatible) or a Pandas dataframe.
         :param forecast_history: This is the length of the historical time series data you wish to
                                 utilize for forecasting
@@ -42,10 +40,12 @@ def __init__(
         :param scaling: (highly reccomended) If provided should be a subclass of sklearn.base.BaseEstimator
         and sklearn.base.TransformerMixin) i.e StandardScaler,  MaxAbsScaler, MinMaxScaler, etc) Note without
         a scaler the loss is likely to explode and cause infinite loss which will corrupt weights
-        :param start_stamp int: Optional if you want to only use part of a CSV for training, validation
+        :param start_stamp: Optional if you want to only use part of a CSV for training, validation
                                 or testing supply these
-        :param end_stamp int: Optional if you want to only use part of a CSV for training, validation,
-                            or testing supply these
+        :type start_stamp: int, optional
+        :param end_stamp: Optional if you want to only use part of a CSV for training, validation,
+                          or testing supply these
+        :type end_stamp: int, optional
         :param sort_column str: The column to sort the time series on prior to forecast.
         :param scaled_cols: The columns you want scaling applied to (if left blank will default to all columns)
         :param feature_params: These are the datetime features you want to create.
@@ -122,13 +122,13 @@ def __len__(self) -> int:
             len(self.df.index) - self.forecast_history - self.forecast_length - 1
         )
 
-    def __sample_and_track_series__(self, idx, series_id=None):
+    def __sample_and_track_series__(self, idx: int, series_id=None):
         pass
 
     def inverse_scale(
         self, result_data: Union[torch.Tensor, pd.Series, np.ndarray]
     ) -> torch.Tensor:
-        """Un-does the scaling of the data
+        """Un-does the scaling of the data.
 
         :param result_data: The data you want to unscale can handle multiple data types.
         :type result_data: Union[torch.Tensor, pd.Series, np.ndarray]
@@ -161,16 +161,16 @@ def inverse_scale(
 
 
 class CSVSeriesIDLoader(CSVDataLoader):
-    def __init__(self, series_id_col: str, main_params: dict, return_method: str, return_all=True):
+    def __init__(self, series_id_col: str, main_params: dict, return_method: str, return_all: bool = True):
         """A data-loader for a CSV file that contains a series ID column.
 
-        :param series_id_col: The id
+        :param series_id_col: The id column of the series you want to forecast.
         :type series_id_col: str
         :param main_params: The central set of parameters
         :type main_params: dict
-        :param return_method: The method of return
+        :param return_method: The method of return (e.g. all series at once, one at a time, or a random sample)
         :type return_method: str
-        :param return_all: Whether to return all items, defaults to True
+        :param return_all: Whether to return all items if set to True then __validate_data_in_df__, defaults to True
         :type return_all: bool, optional
         """
         main_params1 = deepcopy(main_params)
@@ -203,8 +203,7 @@ def __init__(self, series_id_col: str, main_params: dict, return_method: str, re
         print("unique dict")
 
     def __validate_data__in_df(self):
-        """Makes sure the data in the data-frame is the proper length for each series e
-        """
+        """Makes sure the data in the data-frame is the proper length for each series."""
         if self.return_all_series:
             len_first = len(self.listed_vals[0])
             print("Length of first series is:" + str(len_first))
@@ -231,7 +230,6 @@ def __getitem__(self, idx: int) -> Tuple[Dict, Dict]:
             targ_list = {}
             for va in self.listed_vals:
                 # We need to exclude the index column on one end and the series id column on the other
-
                 targ_start_idx = idx + self.forecast_history
                 idx2 = va[self.series_id_col].iloc[0]
                 va_returned = va[va.columns.difference([self.series_id_col], sort=False)]
@@ -241,8 +239,7 @@ def __getitem__(self, idx: int) -> Tuple[Dict, Dict]:
                 targ_list[self.unique_dict[idx2]] = targ
             return src_list, targ_list
         else:
-            raise NotImplementedError
-        return super().__getitem__(idx)
+            raise NotImplementedError("Current code only supports returning all the series at once at each iteration")
 
     def __sample_series_id__(idx, series_id):
         pass
@@ -267,8 +264,12 @@ def __init__(
         **kwargs
     ):
         """
-        :param str df_path: The path to the CSV file you want to use (GCS compatible) or a Pandas DataFrame
-        A data loader for the test data.
+        A data loader for the test data and plotting code it is a subclass of CSVDataLoader.
+        :param str df_path: The path to the CSV file you want to use (GCS compatible) or a Pandas DataFrame.
+        :type df_path: str
+        :param int forecast_total: The total length of the forecast.
+        :
+        :type forecast_total: int
         """
         if "file_path" not in kwargs:
             kwargs["file_path"] = df_path
@@ -283,8 +284,8 @@ def __init__(
         print(df_path)
         self.forecast_total = forecast_total
         # TODO these are antiquated delete them
-        self.use_real_temp = use_real_temp
         self.use_real_precip = use_real_precip
+        self.use_real_temp = use_real_temp
         self.target_supplied = target_supplied
         # Convert back to datetime and save index
         sort_col1 = sort_column_clone if sort_column_clone else "datetime"
@@ -309,7 +310,7 @@ def __getitem__(self, idx):
             historical_rows = self.df.iloc[idx: self.forecast_history + idx]
             target_idx_start = self.forecast_history + idx
             # Why aren't we using these
-            # targ_rows = self.df.iloc[
+            # targ_rows = self.df.ilo c[
             #     target_idx_start : self.forecast_total + target_idx_start
             # ]
             all_rows_orig = self.original_df.iloc[
@@ -319,10 +320,7 @@ def __getitem__(self, idx):
             return historical_rows.float(), all_rows_orig, target_idx_start
 
     def convert_real_batches(self, the_col: str, rows_to_convert):
-        """
-        A helper function to return properly divided precip and temp
-        values to be stacked with t forecasted cfs.
-        """
+        """A helper function to return properly divided precip and temp values to be stacked with t forecasted cfs."""
         the_column = torch.from_numpy(rows_to_convert[the_col].to_numpy())
         chunks = [
             the_column[
@@ -335,8 +333,7 @@ def convert_real_batches(self, the_col: str, rows_to_convert):
     def convert_history_batches(
         self, the_col: Union[str, List[str]], rows_to_convert: pd.DataFrame
     ):
-        """A helper function to return dataframe in batches of
-        size (history_len, num_features)
+        """A helper function to return dataframe in batches of size (history_len, num_features)
 
         Args:
             the_col (str): column names
@@ -358,10 +355,6 @@ def __len__(self) -> int:
         )
 
 
-class TestLoaderABC(CSVTestLoader):
-    pass
-
-
 class AEDataloader(CSVDataLoader):
     def __init__(
             self,
@@ -376,9 +369,8 @@ def __init__(
             forecast_history=1,
             no_scale=True,
             sort_column=None):
-        """A data loader class for autoencoders. Overrides __len__ and __getitem__ from generic dataloader.
-           Also defaults forecast_history and forecast_length to 1. Since AE will likely only use one row.
-           Same parameters as before.
+        """A data loader class for autoencoders. Overrides __len__ and __getitem__ from generic dataloader. Also defaults
+        forecast_history and forecast_length to 1. Since AE will likely only use one row. Same parameters as before.
 
         :param file_path: The path to the file
         :type file_path: str
@@ -597,15 +589,14 @@ def __getitem__(self, idx):
 class VariableSequenceLength(CSVDataLoader):
     def __init__(self, series_marker_column: str, csv_loader_params: Dict, pad_length=None, task="classification",
                  n_classes=9 + 90):
-        """Enables eas(ier) loading of time-series with variable length data
+        """Enables eas(ier) loading of time-series with variable length data.
 
         :param series_marker_column: The column that dealinates when an example begins and ends
         :type series_marker_column: str
         :param pad_length: If the specified the length to truncate sequences at or pad them till that length
         :type pad_length: int
         :param task: The specific task (e.g. classification, forecasting, auto_encode)
         :type task: str
-
         """
         super().__init__(**csv_loader_params)
         self.pad_length = pad_length
@@ -645,8 +636,7 @@ def get_item_auto_encoder(self, idx):
             return the_seq.float(), the_seq.float()
 
     def pad_input_data(self, sequence: int):
-        """Pads a sequence to a specified length.
-        """
+        """Pads a sequence to a specified length."""
         if self.pad_length > sequence.shape[0]:
             pad_dim = self.pad_length - sequence.shape[0]
             return torch.nn.functional.pad(sequence, (0, 0, 0, pad_dim))