GrainLearning · chyalexcheng · Feb 1, 2024 · Feb 3, 2024 · Feb 3, 2024 · Feb 7, 2024
diff --git a/docs/source/rnn.rst b/docs/source/rnn.rst
@@ -4,12 +4,26 @@ RNN Module
 We implemented a `Recurrent Neural Network (RNN) <https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-recurrent-neural-networks>`_ model
 in the tensorflow framework. For more information about the model go to section `The RNN model`_.
 
-There are four main usages of the RNN module:
+There are four main usages of the RNN module (click on the links to download the scripts):
 
 1. `Train a RNN with your own data`_.
 2. `Make a prediction with a pre-trained model`_.
 3. `Use a trained RNN model in grainLearning calibration process`_.
-
+
+We have made one tutorial that demonstrates
+how to combine :download:`training and predicting <../../tutorials/data_driven/LSTM/train_predict_rnn.py>` of ML models within one script.
+and another one on performing click :download:`hyperparameter optimization <../../tutorials/data_driven/LSTM/train_predict_rnn_hypertuning.py>`
+before making predictions.
+
+We provide two simple tutorial to demonstrate how to:
+
+- use an LSTM model that mimic hyperbolic curves for GL Bayesian Calibration workflow 
+  (click :download:`here <../../tutorials/data_driven/LSTM/hyperbola_calibration_lstm.py>`)
+- and partly replace the close-form solution of the hyperbolic model (click :download:`here <../../tutorials/data_driven/LSTM/hyperbola_calibration_mixed.py>`) with the LSTM, 
+
+in addition to the example (click :download:`here <../../tutorials/data_driven/LSTM/rnn_GL_calibration.ipynb>`) where
+an LSTM is pretrained to predict the triaxial response of a granular material and then used in the calibration process.
+
 Train a RNN with your own data
 ------------------------------
 
@@ -129,7 +143,7 @@ Create `my_train.py` where you would like to run the training. Be aware to confi
    preprocessor_TC = preprocessor.PreprocessorTriaxialCompression(**my_config)
 
    # 3. Run the training Tensorflow and reporting to wandb
-   train_rnn.train(preprocessor_TC, config=my_config)
+   history_wandb = train_rnn.train(preprocessor_TC, config=my_config)
 
 Open a terminal where you have your file, activate the environment where grainLearning and rnn dependencies has been installed and run: ``python my_train.py``
 
@@ -155,50 +169,76 @@ Create `my_sweep.py` where you would like to run the training. Configure the swe
 .. code-block:: python
    :caption: my_sweep.py
 
-   import wandb
    import grainlearning.rnn.train as train_rnn
+   from grainlearning.rnn.train import HyperTuning
    from grainlearning.rnn import preprocessor
-
+   import wandb
+   import os
+
+
    def my_training_function():
-     """ A function that wraps the training process"""
-     preprocessor_TC = preprocessor.PreprocessorTriaxialCompression(**wandb.config)
-     train_rnn.train(preprocessor_TC)
-
-   if __name__ == '__main__':
-      wandb.login()
-      sweep_configuration = {
-      'method': 'bayes',
-      'name': 'sweep',
-      'metric': {'goal': 'maximize', 'name': 'val_acc'},
-      'parameters':
-         {
-         'raw_data': 'my_path_to_dataset.hdf5',
-         'pressure': 'All',
-         'experiment_type': 'All',
-         'add_e0': False,
-         'add_pressure': True,
-         'add_experiment_type': True,
-         'train_frac': 0.7,
-         'val_frac': 0.15,
-         'window_size': 10,
-         'window_step': 1,
-         'pad_length': 0,
-         'lstm_units': 200,
-         'dense_units': 200,
-         'patience': 5,
-         'epochs': 100,
-         'learning_rate': 1e-3,
-         'batch_size': 256,
-         'standardize_outputs': True,
-         'save_weights_only': False
-         }
-      }
-
-      # create a new sweep, here you can also configure your project and entity.
-      sweep_id = wandb.sweep(sweep=sweep_configuration)
-
-      # run an agent
-      wandb.agent(sweep_id, function=my_training_function, count=4)
+       """ A function that wraps the training process"""
+       # update window_size of my_config from wandb (only needed for the LSTM model)
+       with wandb.init():
+           my_config['window_size'] = wandb.config['window_size']
+       preprocessor_TC = preprocessor.PreprocessorTriaxialCompression(**my_config)
+       train_rnn.train(preprocessor_TC)
+
+
+   # 1. Create my dictionary of configuration
+   my_config = {
+       'raw_data': 'my_path_to_dataset.hdf5',
+       'pressure': '0.2e6',
+       'experiment_type': 'drained',
+       'add_experiment_type': False,
+       'add_pressure': True,
+       'add_e0': True,
+       'train_frac': 0.7,
+       'val_frac': 0.15,
+       'window_size': 20,
+       'pad_length': 10,
+       'window_step': 1,
+       'patience': 25,
+       'epochs': 10,
+       'learning_rate': 1e-4,
+       'lstm_units': 250,
+       'dense_units': 250,
+       'batch_size': 256,
+       'standardize_outputs': True,
+       'save_weights_only': True
+   }
+
+   # 2. Define the sweep configuration
+   sweep_config = {
+       'method': 'random',
+       'metric': {'goal': 'minimize', 'name': 'val_loss'},
+       'early_terminate': {
+           'type': 'hyperband',
+           's': 2,
+           'eta': 3,
+           'max_iter': 27
+       }
+   }
+
+   search_space = {
+       'learning_rate': {
+           # a flat distribution between 1e-4 and 1e-2
+           'distribution': 'q_log_uniform_values',
+           'q': 1e-4,
+           'min': 1e-4,
+           'max': 1e-2
+       },
+       'lstm_units': {
+           'distribution': 'q_log_uniform_values',
+           'q': 1,
+           'min': 32,
+           'max': 256
+       },
+   }
+
+   # 3. Run the sweep
+   hyper_tuner = HyperTuning(sweep_config, search_space, my_config, project_name='my_sweep')
+   hyper_tuner.run_sweep(my_training_function, count=100)
 
 Open a terminal where you have your file, activate the environment where grainLearning and rnn dependencies has been installed and run: ``python my_sweep.py``.
 
@@ -311,22 +351,24 @@ In this example, we are going to load the same dataset that we used for training
 
    from pathlib import Path
 
-   import grainlearning.rnn.predict as predict_rnn
+   from grainlearning.rnn.predict import predict_batch, plot_metric_distribution
    from grainlearning.rnn import preprocessor
 
    # 1. Define the location of the model to use
-   path_to_trained_model = Path('C:/trained_models/My_model_1')
+   path_to_trained_model = Path('my_path_to_run_directory')
 
    # 2. Get the model information
    model, train_stats, config = predict_rnn.get_pretrained_model(path_to_trained_model)
 
    # 3. Load input data to predict from
-   config['raw_data'] = '../train/data/my_database.hdf5'
+   config['raw_data'] = 'my_path_to_dataset.hdf5'
    preprocessor_TC = preprocessor.PreprocessorTriaxialCompression(**config)
    data, _ = preprocessor_TC.prepare_datasets()
 
-   #4. Make a prediction
-   predictions = predict_rnn.predict_macroscopics(model, data['test'], train_stats, config,batch_size=256, single_batch=True)
+   # 4. Make a prediction and plot the histogram of errors
+   predictions = predict_rnn.predict_batch(model, data['test'], train_stats, config, batch_size=len(data['test']))
+   fig = plot_metric_distribution(data, predictions, config)
+   fig.show()
 
 If the model was trained with ``standardize_outputs = True``, ``predictions`` are going to be unstandardized (i.e. no values between [0, 1] but with the original scale). 
 In our example, ``predictions`` is a tensorflow tensor of size ``(batch_size, length_sequences - window_size, num_labels)``.
@@ -345,18 +387,20 @@ Often this looks like `<entity>/<project>/<sweep_id>`.
    from grainlearning.rnn import preprocessor
 
    # 1. Define which sweep to look into
-   entity_project_sweep_id = 'grainlearning-escience/grainLearning-grainlearning_rnn/6zrc0vjb'
+   entity_project_sweep_id = 'grainlearning/project/sweep_id'
 
    # 2. Chose the best model from a sweep, and get the model information
-   model, data, train_stats, config = predict_rnn.get_best_run_from_sweep(entity_project_sweep_id)
+   model, train_stats, config = predict_rnn.get_best_run_from_sweep(entity_project_sweep_id)
 
    # 3. Load input data to predict from
-   config['raw_data'] = '../train/data/sequences.hdf5'
+   config['raw_data'] = 'my_path_to_dataset.hdf5'
    preprocessor_TC = preprocessor.PreprocessorTriaxialCompression(**config)
    data, _ = preprocessor_TC.prepare_datasets()
 
-   #4. Make a prediction
-   predictions = predict_rnn.predict_macroscopics(model, data['test'], train_stats, config,batch_size=256, single_batch=True)
+   # 4. Make a prediction and plot the histogram of errors
+   predictions = predict_rnn.predict_batch(model, data['test'], train_stats, config, batch_size=len(data['test']))
+   fig = plot_metric_distribution(data, predictions, config)
+   fig.show()
 
 This can fail if you have deleted some runs or if your wandb folder is not present in this folder. We advise to copy `config.yaml`, `train_stats.py` and `model_best.h5` from `wandb/runXXX/files` to another location and follow `Saved model`_ instructions. These files can also be downloaded from the wandb dashboard.
 

diff --git a/grainlearning/rnn/evaluate_model.py b/grainlearning/rnn/evaluate_model.py
@@ -2,7 +2,7 @@
 import random
 import tensorflow as tf
 from matplotlib import pyplot as plt
-
+from sklearn.metrics import mean_absolute_error
 from grainlearning.rnn import predict
 
 
@@ -29,8 +29,7 @@ def plot_predictions(model: tf.keras.Model, data: tf.data.Dataset, train_stats:
     plt.rcParams['axes.labelsize'] = 25
     plt.rcParams['font.family'] = 'sans-serif'
 
-    predictions = predict.predict_macroscopics(model, data, train_stats, config,
-                                       batch_size=batch_size)
+    predictions = predict.predict_batch(model, data, train_stats, config, batch_size=batch_size)
     # extract tensors from dataset
     test_inputs, labels = next(iter(data.batch(batch_size)))
 
@@ -66,7 +65,7 @@ def _plot_sequence(i, j, y_key, i_s=0, x_key='steps', color='blue'):
     for i_s, color in zip(representative_idxs,
             ['blue', 'green', 'purple', 'darkgreen', 'navy', 'yellowgreen']):
 
-        p_label, e_label = _get_p_e_labels(config, test_inputs['contact_parameters'][i_s])
+        p_label, e_label = _get_p_e_labels(config, test_inputs['params'][i_s])
 
         _plot_sequence(0, 0, 'e', i_s=i_s, color=color)
         _plot_sequence(0, 1, 'f_0', i_s=i_s, color=color)
@@ -130,7 +129,7 @@ def _find_representatives(input_data, add_e0: bool, add_pressure: bool, add_expe
         global P_INDEX, E_INDEX
 
         representatives = []
-        contact_params = input_data['contact_parameters']
+        contact_params = input_data['params']
         if add_e0:
             P_INDEX -= 1
             E_INDEX -= 1
@@ -151,7 +150,7 @@ def _find_representatives(input_data, add_e0: bool, add_pressure: bool, add_expe
 
 
 def _find_random_samples(input_data, num_samples):
-    return np.random.choice(len(input_data['contact_parameters']), num_samples, replace=False)
+    return np.random.choice(len(input_data['params']), num_samples, replace=False)
 
 def _checks_extra_contact_params(config: dict):
     """
@@ -175,3 +174,17 @@ def _get_p_e_labels(config: dict, contact_params):
     if add_experiment_type: e_label = 'drained' if contact_params[E_INDEX]==1 else 'undrained'
     else: e_label = config['experiment_type']
     return p_label, e_label
+
+
+def plot_metric_distribution(data, predictions, config):
+    """
+    Plot the histogram of the scores.
+    """
+    test_inputs, labels = next(iter(data.batch(len(data))))
+    scores = [mean_absolute_error(labels[i, config['window_size']:, :].numpy(), predictions.numpy()[i, :, :])
+              for i in range(labels.numpy().shape[0])]
+    fig, ax = plt.subplots(1, 1)
+    ax.set_xlabel('Mean Absolute Error')
+    ax.set_ylabel('Frequency')
+    ax.hist(scores)
+    return fig
diff --git a/grainlearning/rnn/models.py b/grainlearning/rnn/models.py
@@ -17,11 +17,11 @@ def rnn_model(
     """
     Neural network with an LSTM layer.
 
-    Takes in a load sequence and contact parameters, and outputs the macroscopic responses.
-    The contact parameters are used to initialize the hidden state of the LSTM.
+    Takes in an input sequence and the parameters and produces an output sequence.
+    The parameters are used to initialize the hidden state of the LSTM.
 
-    :param input_shapes: Dictionary containing `'num_load_features'`, `'num_contact_params'`,
-        `'num_labels'`. It can contain other keys but hese are the ones used here.
+    :param input_shapes: Dictionary containing `'num_input_features'`, `'num_params'`,
+        `'num_labels'`. It can contain other keys but these are the ones used here.
     :param window_size: Length of time window.
     :param lstm_units: Number of units of the hidden state of the LSTM.
     :param dense_units: Number of units used in the dense layer after the LSTM.
@@ -34,12 +34,12 @@ def rnn_model(
 
     sequence_length = window_size
     load_sequence = layers.Input(
-            shape=(sequence_length, input_shapes['num_load_features']), name='load_sequence')
-    contact_params = layers.Input(shape=(input_shapes['num_contact_params'],), name='contact_parameters')
+            shape=(sequence_length, input_shapes['num_input_features']), name='inputs')
+    params = layers.Input(shape=(input_shapes['num_params'],), name='params')
 
     # compute hidden state of LSTM based on contact parameters
-    state_h = layers.Dense(lstm_units, activation='tanh', name='state_h')(contact_params)
-    state_c = layers.Dense(lstm_units, activation='tanh', name='state_c')(contact_params)
+    state_h = layers.Dense(lstm_units, activation='tanh', name='state_h')(params)
+    state_c = layers.Dense(lstm_units, activation='tanh', name='state_c')(params)
     initial_state = [state_h, state_c]
 
     X = load_sequence
@@ -49,6 +49,30 @@ def rnn_model(
     X = layers.Dense(dense_units, activation='relu')(X)
     outputs = layers.Dense(input_shapes['num_labels'])(X)
 
-    model = Model(inputs=[load_sequence, contact_params], outputs=outputs)
+    model = Model(inputs=[load_sequence, params], outputs=outputs)
 
     return model
+
+
+def rnn_model_for_triax(
+        input_shapes: dict,
+        window_size: int = 20,
+        lstm_units: int = 50,
+        dense_units: int = 20,
+        seed: int = 42,
+        **_,
+        ):
+    """
+    A wrapper of neural network model with an LSTM layer for triaxial loading conditions.
+    :param input_shapes: Dictionary containing `'num_load_features'`, `'num_contact_params'`, and `'num_labels'`.
+    :param window_size: Length of time window.
+    :param lstm_units: Number of units of the hidden state of the LSTM.
+    :param dense_units: Number of units used in the dense layer after the LSTM.
+    :param seed: The random seed used to initialize the weights.
+
+    :return: A Keras model.
+    """
+    # change the name of the keys to match the original model
+    input_shapes['num_input_features'] = input_shapes.pop('num_load_features')
+    input_shapes['num_params'] = input_shapes.pop('num_contact_params')
+    return rnn_model(input_shapes, window_size, lstm_units, dense_units, seed)