diff --git a/benchmark_code/downstream_classification.py b/benchmark_code/downstream_classification.py index d91669d..c6ea2c4 100644 --- a/benchmark_code/downstream_classification.py +++ b/benchmark_code/downstream_classification.py @@ -24,6 +24,23 @@ from global_config import RANDOM_SEEDS +def calc_multiclass_classification_metrics(proba_predictions, y_true, n_classes): + # calc weighted roc auc + weighted_roc_auc = 0 + weighted_pr_auc = 0 + for i in range(n_classes): + result_metrics = calc_binary_classification_metrics( + proba_predictions[:, i], y_true == i + ) + pr_auc = result_metrics["pr_auc"] + roc_auc = result_metrics["roc_auc"] + weighted_pr_auc += pr_auc * (y_true == i).sum() + weighted_roc_auc += roc_auc * (y_true == i).sum() + weighted_roc_auc /= len(y_true) + weighted_pr_auc /= len(y_true) + return weighted_pr_auc, weighted_roc_auc + + class LoadImputedDataAndLabel(Dataset): def __init__(self, imputed_data, labels): self.imputed_data = imputed_data @@ -269,11 +286,13 @@ def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=1 classification_metrics["roc_auc"], ) else: - pr_auc, roc_auc = None, None + pr_auc, roc_auc = calc_multiclass_classification_metrics( + proba_predictions, test_y, args.n_classes + ) xgb_wo_pr_auc_collector.append(pr_auc) xgb_wo_roc_auc_collector.append(roc_auc) - # XGBoost model without imputation + # XGBoost model with imputation xgb = XGBClassifier() xgb.fit( train_X.reshape(-1, n_flatten_features), @@ -291,7 +310,9 @@ def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=1 classification_metrics["roc_auc"], ) else: - pr_auc, roc_auc = None, None + pr_auc, roc_auc = calc_multiclass_classification_metrics( + proba_predictions, test_y, args.n_classes + ) xgb_pr_auc_collector.append(pr_auc) xgb_roc_auc_collector.append(roc_auc) @@ -312,7 +333,9 @@ def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=1 classification_metrics["roc_auc"], ) else: - pr_auc, roc_auc = None, None + pr_auc, roc_auc = calc_multiclass_classification_metrics( + proba_predictions, test_y, args.n_classes + ) rnn_pr_auc_collector.append(pr_auc) rnn_roc_auc_collector.append(roc_auc) @@ -339,7 +362,9 @@ def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=1 classification_metrics["roc_auc"], ) else: - pr_auc, roc_auc = None, None + pr_auc, roc_auc = calc_multiclass_classification_metrics( + proba_predictions, test_y, args.n_classes + ) transformer_pr_auc_collector.append(pr_auc) transformer_roc_auc_collector.append(roc_auc) diff --git a/benchmark_code/downstream_classification_naive.py b/benchmark_code/downstream_classification_naive.py new file mode 100644 index 0000000..31f3e0a --- /dev/null +++ b/benchmark_code/downstream_classification_naive.py @@ -0,0 +1,332 @@ +""" +A simple RNN classifier for downstream classification task on imputed Physionet 2012 dataset. +""" + +# Created by Wenjie Du +# License: BSD-3-Clause + +import argparse +import os + +import h5py +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from pypots.data.saving import load_dict_from_h5 +from pypots.nn.modules.transformer import TransformerEncoder, PositionalEncoding +from pypots.utils.logging import logger +from pypots.utils.metrics import calc_binary_classification_metrics +from pypots.utils.random import set_random_seed +from torch.utils.data import Dataset, DataLoader +from xgboost import XGBClassifier + +from global_config import RANDOM_SEEDS + + +def calc_multiclass_classification_metrics(proba_predictions, y_true, n_classes): + # calc weighted roc auc + weighted_roc_auc = 0 + weighted_pr_auc = 0 + for i in range(n_classes): + result_metrics = calc_binary_classification_metrics( + proba_predictions[:, i], y_true == i + ) + pr_auc = result_metrics["pr_auc"] + roc_auc = result_metrics["roc_auc"] + weighted_pr_auc += pr_auc * (y_true == i).sum() + weighted_roc_auc += roc_auc * (y_true == i).sum() + weighted_roc_auc /= len(y_true) + weighted_pr_auc /= len(y_true) + return weighted_pr_auc, weighted_roc_auc + + +class LoadImputedDataAndLabel(Dataset): + def __init__(self, imputed_data, labels): + self.imputed_data = imputed_data + self.labels = labels + + def __len__(self): + return len(self.labels) + + def __getitem__(self, idx): + return ( + torch.from_numpy(self.imputed_data[idx]).to(torch.float32), + torch.tensor(self.labels[idx]).to(torch.long), + ) + + +class SimpleRNNClassification(torch.nn.Module): + def __init__(self, n_features, rnn_hidden_size, n_classes): + super().__init__() + self.rnn = torch.nn.LSTM( + n_features, + hidden_size=rnn_hidden_size, + batch_first=True, + ) + self.fcn = torch.nn.Linear(rnn_hidden_size, n_classes) + + def forward(self, data): + hidden_states, _ = self.rnn(data) + logits = self.fcn(hidden_states[:, -1, :]) + prediction_probabilities = torch.sigmoid(logits) + return prediction_probabilities + + +class TransformerClassification(torch.nn.Module): + def __init__( + self, + n_steps, + n_features, + n_layers, + d_model, + n_heads, + d_ffn, + dropout, + attn_dropout, + n_classes, + ): + super().__init__() + self.embedding = nn.Linear(n_features, d_model) + self.pos_encoding = PositionalEncoding(d_model) + self.transformer_encoder = TransformerEncoder( + n_layers=n_layers, + d_model=d_model, + n_heads=n_heads, + d_k=int(d_model / n_heads), + d_v=int(d_model / n_heads), + d_ffn=d_ffn, + dropout=dropout, + attn_dropout=attn_dropout, + ) + self.output = nn.Linear(d_model * n_steps, n_classes) + + def forward(self, data): + bz = data.shape[0] + embedding = self.pos_encoding(self.embedding(data)) + encoding, _ = self.transformer_encoder(embedding) + encoding = encoding.reshape(bz, -1) + logits = self.output(encoding) + prediction_probabilities = torch.sigmoid(logits) + return prediction_probabilities + + +def train(model, train_dataloader, val_dataloader): + n_epochs = 100 + patience = 10 + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + current_patience = patience + best_loss = float("inf") + for epoch in range(n_epochs): + model.train() + for idx, data in enumerate(train_dataloader): + X, y = map(lambda x: x.to(args.device), data) + optimizer.zero_grad() + probabilities = model(X) + loss = F.cross_entropy(probabilities, y.reshape(-1)) + loss.backward() + optimizer.step() + + model.eval() + loss_collector = [] + with torch.no_grad(): + for idx, data in enumerate(val_dataloader): + X, y = map(lambda x: x.to(args.device), data) + probabilities = model(X) + loss = F.cross_entropy(probabilities, y.reshape(-1)) + loss_collector.append(loss.item()) + + loss = np.asarray(loss_collector).mean() + if best_loss > loss: + current_patience = patience + best_loss = loss + best_model = model.state_dict() + else: + current_patience -= 1 + + if current_patience == 0: + break + + model.load_state_dict(best_model) + model.eval() + + probability_collector = [] + for idx, data in enumerate(test_loader): + X, y = map(lambda x: x.to(args.device), data) + probabilities = model.forward(X) + probability_collector += probabilities.cpu().tolist() + + probability_collector = np.asarray(probability_collector) + return probability_collector + + +def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=128): + train_set = LoadImputedDataAndLabel(train_X, train_y) + val_set = LoadImputedDataAndLabel(val_X, val_y) + test_set = LoadImputedDataAndLabel(test_X, test_y) + train_loader = DataLoader(train_set, batch_size, shuffle=True) + val_loader = DataLoader(val_set, batch_size, shuffle=False) + test_loader = DataLoader(test_set, batch_size, shuffle=False) + return train_loader, val_loader, test_loader + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--device", + type=str, + help="device to run the model, e.g. cuda:0", + required=True, + ) + parser.add_argument( + "--dataset", + type=str, + help="the dataset name", + required=True, + ) + parser.add_argument( + "--dataset_fold_path", + type=str, + help="the dataset fold path, where should include 3 H5 files train.h5, val.h5 and test.h5", + required=True, + ) + parser.add_argument( + "--n_classes", + type=int, + help="the number of classes", + required=True, + ) + args = parser.parse_args() + + train_set_path = os.path.join(args.dataset_fold_path, "train.h5") + val_set_path = os.path.join(args.dataset_fold_path, "val.h5") + test_set_path = os.path.join(args.dataset_fold_path, "test.h5") + with h5py.File(train_set_path, "r") as hf: + pots_train_X = hf["X"][:] + train_y = hf["y"][:] + with h5py.File(val_set_path, "r") as hf: + pots_val_X = hf["X"][:] + val_y = hf["y"][:] + with h5py.File(test_set_path, "r") as hf: + pots_test_X = hf["X"][:] + test_y = hf["y"][:] + + if args.dataset == "Pedestrian": + # Pedestrian dataset has 10 classes with label from 1 to 10, we need to convert it to 0 to 9 + train_y, val_y, test_y = train_y - 1, val_y - 1, test_y - 1 + + xgb_wo_pr_auc_collector = [] + xgb_wo_roc_auc_collector = [] + xgb_pr_auc_collector = [] + xgb_roc_auc_collector = [] + rnn_pr_auc_collector = [] + rnn_roc_auc_collector = [] + transformer_pr_auc_collector = [] + transformer_roc_auc_collector = [] + + imputed_data_path = os.path.join( + args.dataset_fold_path, + f"naive_imputation.h5", + ) + imputed_data = load_dict_from_h5(imputed_data_path) + for naive_method in ["mean", "median", "locf", "linear_interpolation"]: + for n_round in range(5): + train_X, val_X, test_X = ( + imputed_data["train"][naive_method], + imputed_data["val"][naive_method], + imputed_data["test"][naive_method], + ) + + train_loader, val_loader, test_loader = get_dataloaders( + train_X, train_y, val_X, val_y, test_X, test_y + ) + set_random_seed(RANDOM_SEEDS[n_round]) + n_flatten_features = np.product(train_X.shape[1:]) + + # XGBoost model with imputation + xgb = XGBClassifier() + xgb.fit( + train_X.reshape(-1, n_flatten_features), + train_y, + eval_set=[(val_X.reshape(-1, n_flatten_features), val_y)], + verbose=False, + ) + proba_predictions = xgb.predict_proba( + test_X.reshape(-1, n_flatten_features) + ) + if args.n_classes == 2: + classification_metrics = calc_binary_classification_metrics( + proba_predictions, test_y + ) + pr_auc, roc_auc = ( + classification_metrics["pr_auc"], + classification_metrics["roc_auc"], + ) + else: + pr_auc, roc_auc = calc_multiclass_classification_metrics( + proba_predictions, test_y, args.n_classes + ) + xgb_pr_auc_collector.append(pr_auc) + xgb_roc_auc_collector.append(roc_auc) + + # RNN model + simple_rnn_classifier = SimpleRNNClassification( + n_features=train_X.shape[-1], + rnn_hidden_size=128, + n_classes=args.n_classes, + ) + simple_rnn_classifier = simple_rnn_classifier.to(args.device) + proba_predictions = train(simple_rnn_classifier, train_loader, val_loader) + if args.n_classes == 2: + classification_metrics = calc_binary_classification_metrics( + proba_predictions, test_y + ) + pr_auc, roc_auc = ( + classification_metrics["pr_auc"], + classification_metrics["roc_auc"], + ) + else: + pr_auc, roc_auc = calc_multiclass_classification_metrics( + proba_predictions, test_y, args.n_classes + ) + rnn_pr_auc_collector.append(pr_auc) + rnn_roc_auc_collector.append(roc_auc) + + # Transformer model + transformer_classifier = TransformerClassification( + n_steps=train_X.shape[1], + n_features=train_X.shape[2], + n_layers=1, + d_model=64, + n_heads=2, + d_ffn=128, + dropout=0.1, + attn_dropout=0, + n_classes=args.n_classes, + ) + transformer_classifier = transformer_classifier.to(args.device) + proba_predictions = train(transformer_classifier, train_loader, val_loader) + if args.n_classes == 2: + classification_metrics = calc_binary_classification_metrics( + proba_predictions, test_y + ) + pr_auc, roc_auc = ( + classification_metrics["pr_auc"], + classification_metrics["roc_auc"], + ) + else: + pr_auc, roc_auc = calc_multiclass_classification_metrics( + proba_predictions, test_y, args.n_classes + ) + transformer_pr_auc_collector.append(pr_auc) + transformer_roc_auc_collector.append(roc_auc) + + logger.info( + "\n" + f"XGB with {naive_method} imputation PR_AUC: {np.mean(xgb_pr_auc_collector):.4f}±{np.std(xgb_pr_auc_collector):.4f}, " + f"ROC_AUC: {np.mean(xgb_roc_auc_collector):.4f}±{np.std(xgb_roc_auc_collector):.4f}\n" + f"RNN with {naive_method} imputation PR_AUC: {np.mean(rnn_pr_auc_collector):.4f}±{np.std(rnn_pr_auc_collector):.4f}, " + f"ROC_AUC: {np.mean(rnn_roc_auc_collector):.4f}±{np.std(rnn_roc_auc_collector):.4f}\n" + f"Transformer with {naive_method} imputation PR_AUC: {np.mean(transformer_pr_auc_collector):.4f}±{np.std(transformer_pr_auc_collector):.4f}, " + f"ROC_AUC: {np.mean(transformer_roc_auc_collector):.4f}±{np.std(transformer_roc_auc_collector):.4f}\n" + ) diff --git a/benchmark_code/downstream_forecasting.py b/benchmark_code/downstream_forecasting.py new file mode 100644 index 0000000..ee2328e --- /dev/null +++ b/benchmark_code/downstream_forecasting.py @@ -0,0 +1,433 @@ +""" +A simple RNN classifier for downstream classification task on imputed Physionet 2012 dataset. +""" + +# Created by Wenjie Du +# License: BSD-3-Clause + +import argparse +import os + +import h5py +import numpy as np +import torch +import torch.nn as nn +from pypots.data.saving import pickle_load +from pypots.nn.modules.transformer import ( + TransformerEncoder, + TransformerDecoder, + PositionalEncoding, +) +from pypots.utils.logging import logger +from pypots.utils.metrics import calc_mae, calc_mse, calc_mre +from pypots.utils.random import set_random_seed +from torch.utils.data import Dataset, DataLoader +from xgboost import XGBRegressor + +from global_config import RANDOM_SEEDS + + +class LoadImputedData(Dataset): + def __init__(self, imputed_input, target): + self.imputed_input = imputed_input + self.target = target + + def __len__(self): + return len(self.imputed_input) + + def __getitem__(self, idx): + return ( + torch.from_numpy(self.imputed_input[idx]).to(torch.float32), + torch.from_numpy(self.target[idx]).to(torch.float32), + ) + + +class SimpleRNNForecaster(torch.nn.Module): + def __init__( + self, n_features, n_steps, rnn_hidden_size, n_out_features, n_out_steps + ): + super().__init__() + self.n_steps = n_steps + self.n_out_steps = n_out_steps + self.rnn_hidden_size = rnn_hidden_size + self.rnn = torch.nn.LSTM( + n_features, + hidden_size=rnn_hidden_size, + batch_first=True, + ) + self.fcn_out = torch.nn.Linear(rnn_hidden_size, n_out_features) + + def forward(self, data): + estimations = [] + for i in range(self.n_out_steps): + X = data[:, i : i + self.n_steps] + hidden_states, _ = self.rnn(X) + estimation = self.fcn_out(hidden_states[:, -1]) + estimations.append(estimation) + + output = torch.stack(estimations, dim=1) + return output + + +class TransformerForecaster(torch.nn.Module): + def __init__( + self, + n_features, + n_steps, + n_out_features, + n_out_steps, + n_layers, + d_model, + n_heads, + d_ffn, + dropout, + attn_dropout, + ): + super().__init__() + self.n_out_steps = n_out_steps + self.encoder_embedding = nn.Linear(n_features, d_model) + self.pos_encoding = PositionalEncoding(d_model) + self.transformer_encoder = TransformerEncoder( + n_layers=n_layers, + d_model=d_model, + n_heads=n_heads, + d_k=int(d_model / n_heads), + d_v=int(d_model / n_heads), + d_ffn=d_ffn, + dropout=dropout, + attn_dropout=attn_dropout, + ) + self.transformer_decoder = TransformerDecoder( + n_steps=n_steps, + n_features=n_out_features, + n_layers=n_layers, + d_model=d_model, + n_heads=n_heads, + d_k=int(d_model / n_heads), + d_v=int(d_model / n_heads), + d_ffn=d_ffn, + dropout=dropout, + attn_dropout=attn_dropout, + ) + self.output = nn.Linear(d_model, n_out_features) + + def forward(self, X, forecasting_X): + embedding = self.pos_encoding(self.encoder_embedding(X)) + encoding, _ = self.transformer_encoder(embedding) + decoding = self.transformer_decoder(forecasting_X, encoding) + output = self.output(decoding) + return output[:, -self.n_out_steps :] + + +def train(model, train_dataloader, val_dataloader, test_dataloader): + n_epochs = 100 + patience = 10 + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + current_patience = patience + best_loss = float("inf") + for epoch in range(n_epochs): + model.train() + for idx, data in enumerate(train_dataloader): + X, y = map(lambda x: x.to(args.device), data) + optimizer.zero_grad() + if "RNN" in model._get_name(): + predictions = model(X) + else: + predictions = model(X, y) + loss = calc_mse(predictions, y[:, -n_forecasting_steps:]) + loss.backward() + optimizer.step() + + model.eval() + loss_collector = [] + with torch.no_grad(): + for idx, data in enumerate(val_dataloader): + X, y = map(lambda x: x.to(args.device), data) + if "RNN" in model._get_name(): + predictions = model(X) + else: + predictions = model(X, y) + loss = calc_mse(predictions, y[:, -n_forecasting_steps:]) + loss_collector.append(loss.item()) + + loss = np.asarray(loss_collector).mean() + if best_loss > loss: + current_patience = patience + best_loss = loss + best_model = model.state_dict() + else: + current_patience -= 1 + + if current_patience == 0: + break + + model.load_state_dict(best_model) + model.eval() + + prediction_collector = [] + for idx, data in enumerate(test_dataloader): + X, y = map(lambda x: x.to(args.device), data) + if "RNN" in model._get_name(): + predictions = model(X) + else: + predictions = model(X, y) + prediction_collector += predictions.cpu().tolist() + + prediction_collector = np.asarray(prediction_collector) + return prediction_collector + + +def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=128): + train_set = LoadImputedData(train_X, train_y) + val_set = LoadImputedData(val_X, val_y) + test_set = LoadImputedData(test_X, test_y) + train_loader = DataLoader(train_set, batch_size, shuffle=True) + val_loader = DataLoader(val_set, batch_size, shuffle=False) + test_loader = DataLoader(test_set, batch_size, shuffle=False) + return train_loader, val_loader, test_loader + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--device", + type=str, + help="device to run the model, e.g. cuda:0", + required=True, + ) + parser.add_argument( + "--model", + type=str, + help="the model name", + required=True, + ) + parser.add_argument( + "--dataset", + type=str, + help="the dataset name", + required=True, + ) + parser.add_argument( + "--dataset_fold_path", + type=str, + help="the dataset fold path, where should include 3 H5 files train.h5, val.h5 and test.h5", + required=True, + ) + parser.add_argument( + "--model_result_parent_fold", + type=str, + help="the parent fold of the model results, where should include the folds of 5 rounds", + required=True, + ) + args = parser.parse_args() + + n_forecasting_steps = 5 # forecasting 5 steps ahead + + train_set_path = os.path.join(args.dataset_fold_path, "train.h5") + val_set_path = os.path.join(args.dataset_fold_path, "val.h5") + test_set_path = os.path.join(args.dataset_fold_path, "test.h5") + with h5py.File(train_set_path, "r") as hf: + pots_train_X = hf["X"][:] + ori_train_X = hf["X_ori"][:] + with h5py.File(val_set_path, "r") as hf: + pots_val_X = hf["X"][:] + ori_val_X = hf["X_ori"][:] + with h5py.File(test_set_path, "r") as hf: + pots_test_X = hf["X"][:] + ori_test_X = hf["X_ori"][:] + + train_X_collector = [] + val_X_collector = [] + test_X_collector = [] + + for n_round in range(5): + imputed_data_path = os.path.join( + args.model_result_parent_fold, + f"round_{n_round}/imputation.pkl", + ) + imputed_data = pickle_load(imputed_data_path) + _train_X, _val_X, _test_X = ( + imputed_data["train_set_imputation"], + imputed_data["val_set_imputation"], + imputed_data["test_set_imputation"], + ) + train_X_collector.append(_train_X) + val_X_collector.append(_val_X) + test_X_collector.append(_test_X) + train_X, val_X, test_X = ( + np.mean(np.stack(train_X_collector), axis=0), + np.mean(np.stack(val_X_collector), axis=0), + np.mean(np.stack(test_X_collector), axis=0), + ) + + xgb_wo_metrics_collector = {"mae": [], "mse": [], "mre": []} + xgb_metrics_collector = {"mae": [], "mse": [], "mre": []} + rnn_metrics_collector = {"mae": [], "mse": [], "mre": []} + transformer_metrics_collector = {"mae": [], "mse": [], "mre": []} + + train_y, val_y, test_y = ( + ori_train_X[:, :, -1:], + ori_val_X[:, :, -1:], + ori_test_X[:, :, -1:], + ) + train_loader, val_loader, test_loader = get_dataloaders( + train_X[:, :, :-1], + train_y, + val_X[:, :, :-1], + val_y, + test_X[:, :, :-1], + test_y, + ) + + trans_train_X, trans_train_y = np.copy(train_X), np.copy(train_y) + trans_val_X, trans_val_y = np.copy(val_X), np.copy(val_y) + trans_test_X, trans_test_y = np.copy(test_X), np.copy(test_y) + trans_val_X[:, -n_forecasting_steps:] = 0 + trans_val_y[:, :-n_forecasting_steps] = 0 + trans_test_X[:, -n_forecasting_steps:] = 0 + trans_test_y[:, :-n_forecasting_steps] = 0 + trans_train_loader, trans_val_loader, trans_test_loader = get_dataloaders( + trans_train_X[:, :, :-1], + trans_train_y, + trans_val_X[:, :, :-1], + trans_val_y, + trans_test_X[:, :, :-1], + trans_test_y, + ) + for n_round in range(5): + set_random_seed(RANDOM_SEEDS[n_round]) + _, n_steps, n_features = train_X[:, :-n_forecasting_steps, :-1].shape + n_in_flatten_features = n_steps * n_features + n_out_flatten_features = np.product(train_y[:, -n_forecasting_steps:].shape[1:]) + + # XGBoost model without imputation + xgb = XGBRegressor() + xgb.fit( + pots_train_X[:, :-n_forecasting_steps, :-1].reshape( + -1, n_in_flatten_features + ), + train_y[:, -n_forecasting_steps:].reshape(-1, n_out_flatten_features), + eval_set=[ + ( + pots_val_X[:, :-n_forecasting_steps, :-1].reshape( + -1, n_in_flatten_features + ), + val_y[:, -n_forecasting_steps:].reshape(-1, n_out_flatten_features), + ) + ], + verbose=False, + ) + predictions = xgb.predict( + pots_test_X[:, :-n_forecasting_steps, :-1].reshape( + -1, n_in_flatten_features + ) + ) + predictions = predictions.reshape(-1, n_forecasting_steps, 1) + xgb_wo_metrics_collector["mae"].append( + calc_mae(predictions, test_y[:, -n_forecasting_steps:]) + ) + xgb_wo_metrics_collector["mse"].append( + calc_mse(predictions, test_y[:, -n_forecasting_steps:]) + ) + xgb_wo_metrics_collector["mre"].append( + calc_mre(predictions, test_y[:, -n_forecasting_steps:]) + ) + + # XGBoost model with imputation + xgb = XGBRegressor() + xgb.fit( + train_X[:, :-n_forecasting_steps, :-1].reshape(-1, n_in_flatten_features), + train_y[:, -n_forecasting_steps:].reshape(-1, n_out_flatten_features), + eval_set=[ + ( + val_X[:, :-n_forecasting_steps, :-1].reshape( + -1, n_in_flatten_features + ), + val_y[:, -n_forecasting_steps:].reshape(-1, n_out_flatten_features), + ) + ], + verbose=False, + ) + predictions = xgb.predict( + test_X[:, :-n_forecasting_steps, :-1].reshape(-1, n_in_flatten_features) + ) + predictions = predictions.reshape(-1, n_forecasting_steps, 1) + xgb_metrics_collector["mae"].append( + calc_mae(predictions, test_y[:, -n_forecasting_steps:]) + ) + xgb_metrics_collector["mse"].append( + calc_mse(predictions, test_y[:, -n_forecasting_steps:]) + ) + xgb_metrics_collector["mre"].append( + calc_mre(predictions, test_y[:, -n_forecasting_steps:]) + ) + + # RNN model + simple_rnn_regressor = SimpleRNNForecaster( + n_features=n_features, + n_steps=n_steps, + rnn_hidden_size=128, + n_out_features=1, + n_out_steps=n_forecasting_steps, + ) + simple_rnn_regressor = simple_rnn_regressor.to(args.device) + predictions = train(simple_rnn_regressor, train_loader, val_loader, test_loader) + rnn_metrics_collector["mae"].append( + calc_mae(predictions, test_y[:, -n_forecasting_steps:]) + ) + rnn_metrics_collector["mse"].append( + calc_mse(predictions, test_y[:, -n_forecasting_steps:]) + ) + rnn_metrics_collector["mre"].append( + calc_mre(predictions, test_y[:, -n_forecasting_steps:]) + ) + + # Transformer model + transformer_forecaster = TransformerForecaster( + n_features=n_features, + n_steps=n_steps + n_forecasting_steps, + n_out_features=1, + n_out_steps=n_forecasting_steps, + n_layers=1, + d_model=64, + n_heads=2, + d_ffn=128, + dropout=0.1, + attn_dropout=0, + ) + transformer_forecaster = transformer_forecaster.to(args.device) + predictions = train( + transformer_forecaster, + trans_train_loader, + trans_val_loader, + trans_test_loader, + ) + transformer_metrics_collector["mae"].append( + calc_mae(predictions, test_y[:, -n_forecasting_steps:]) + ) + transformer_metrics_collector["mse"].append( + calc_mse(predictions, test_y[:, -n_forecasting_steps:]) + ) + transformer_metrics_collector["mre"].append( + calc_mre(predictions, test_y[:, -n_forecasting_steps:]) + ) + + logger.info( + "\n" + f"XGB (without imputation) regression " + f"MAE: {np.mean(xgb_wo_metrics_collector['mae']):.4f}±{np.std(xgb_wo_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(xgb_wo_metrics_collector['mse']):.4f}±{np.std(xgb_wo_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(xgb_wo_metrics_collector['mre']):.4f}±{np.std(xgb_wo_metrics_collector['mre']):.4f}\n" + f"XGB (with {args.model} imputation) regression " + f"MAE: {np.mean(xgb_metrics_collector['mae']):.4f}±{np.std(xgb_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(xgb_metrics_collector['mse']):.4f}±{np.std(xgb_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(xgb_metrics_collector['mre']):.4f}±{np.std(xgb_metrics_collector['mre']):.4f}\n" + f"RNN (with {args.model} imputation) regression " + f"MAE: {np.mean(rnn_metrics_collector['mae']):.4f}±{np.std(rnn_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(rnn_metrics_collector['mse']):.4f}±{np.std(rnn_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(rnn_metrics_collector['mre']):.4f}±{np.std(rnn_metrics_collector['mre']):.4f}\n" + f"Transformer (with {args.model} imputation) regression " + f"MAE: {np.mean(transformer_metrics_collector['mae']):.4f}±{np.std(transformer_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(transformer_metrics_collector['mse']):.4f}±{np.std(transformer_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(transformer_metrics_collector['mre']):.4f}±{np.std(transformer_metrics_collector['mre']):.4f}\n" + ) diff --git a/benchmark_code/downstream_forecasting_naive.py b/benchmark_code/downstream_forecasting_naive.py new file mode 100644 index 0000000..5e6b518 --- /dev/null +++ b/benchmark_code/downstream_forecasting_naive.py @@ -0,0 +1,381 @@ +""" +A simple RNN classifier for downstream classification task on imputed Physionet 2012 dataset. +""" + +# Created by Wenjie Du +# License: BSD-3-Clause + +import argparse +import os + +import h5py +import numpy as np +import torch +import torch.nn as nn +from pypots.data.saving import load_dict_from_h5 +from pypots.nn.modules.transformer import ( + TransformerEncoder, + TransformerDecoder, + PositionalEncoding, +) +from pypots.utils.logging import logger +from pypots.utils.metrics import calc_mae, calc_mse, calc_mre +from pypots.utils.random import set_random_seed +from torch.utils.data import Dataset, DataLoader +from xgboost import XGBRegressor + +from global_config import RANDOM_SEEDS + + +class LoadImputedData(Dataset): + def __init__(self, imputed_input, target): + self.imputed_input = imputed_input + self.target = target + + def __len__(self): + return len(self.imputed_input) + + def __getitem__(self, idx): + return ( + torch.from_numpy(self.imputed_input[idx]).to(torch.float32), + torch.from_numpy(self.target[idx]).to(torch.float32), + ) + + +class SimpleRNNForecaster(torch.nn.Module): + def __init__( + self, n_features, n_steps, rnn_hidden_size, n_out_features, n_out_steps + ): + super().__init__() + self.n_steps = n_steps + self.n_out_steps = n_out_steps + self.rnn_hidden_size = rnn_hidden_size + self.rnn = torch.nn.LSTM( + n_features, + hidden_size=rnn_hidden_size, + batch_first=True, + ) + self.fcn_out = torch.nn.Linear(rnn_hidden_size, n_out_features) + + def forward(self, data): + estimations = [] + for i in range(self.n_out_steps): + X = data[:, i : i + self.n_steps] + hidden_states, _ = self.rnn(X) + estimation = self.fcn_out(hidden_states[:, -1]) + estimations.append(estimation) + + output = torch.stack(estimations, dim=1) + return output + + +class TransformerForecaster(torch.nn.Module): + def __init__( + self, + n_features, + n_steps, + n_out_features, + n_out_steps, + n_layers, + d_model, + n_heads, + d_ffn, + dropout, + attn_dropout, + ): + super().__init__() + self.n_out_steps = n_out_steps + self.encoder_embedding = nn.Linear(n_features, d_model) + self.pos_encoding = PositionalEncoding(d_model) + self.transformer_encoder = TransformerEncoder( + n_layers=n_layers, + d_model=d_model, + n_heads=n_heads, + d_k=int(d_model / n_heads), + d_v=int(d_model / n_heads), + d_ffn=d_ffn, + dropout=dropout, + attn_dropout=attn_dropout, + ) + self.transformer_decoder = TransformerDecoder( + n_steps=n_steps, + n_features=n_out_features, + n_layers=n_layers, + d_model=d_model, + n_heads=n_heads, + d_k=int(d_model / n_heads), + d_v=int(d_model / n_heads), + d_ffn=d_ffn, + dropout=dropout, + attn_dropout=attn_dropout, + ) + self.output = nn.Linear(d_model, n_out_features) + + def forward(self, X, forecasting_X): + embedding = self.pos_encoding(self.encoder_embedding(X)) + encoding, _ = self.transformer_encoder(embedding) + decoding = self.transformer_decoder(forecasting_X, encoding) + output = self.output(decoding) + return output[:, -self.n_out_steps :] + + +def train(model, train_dataloader, val_dataloader, test_dataloader): + n_epochs = 100 + patience = 10 + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + current_patience = patience + best_loss = float("inf") + for epoch in range(n_epochs): + model.train() + for idx, data in enumerate(train_dataloader): + X, y = map(lambda x: x.to(args.device), data) + optimizer.zero_grad() + if "RNN" in model._get_name(): + predictions = model(X) + else: + predictions = model(X, y) + loss = calc_mse(predictions, y[:, -n_forecasting_steps:]) + loss.backward() + optimizer.step() + + model.eval() + loss_collector = [] + with torch.no_grad(): + for idx, data in enumerate(val_dataloader): + X, y = map(lambda x: x.to(args.device), data) + if "RNN" in model._get_name(): + predictions = model(X) + else: + predictions = model(X, y) + loss = calc_mse(predictions, y[:, -n_forecasting_steps:]) + loss_collector.append(loss.item()) + + loss = np.asarray(loss_collector).mean() + if best_loss > loss: + current_patience = patience + best_loss = loss + best_model = model.state_dict() + else: + current_patience -= 1 + + if current_patience == 0: + break + + model.load_state_dict(best_model) + model.eval() + + prediction_collector = [] + for idx, data in enumerate(test_dataloader): + X, y = map(lambda x: x.to(args.device), data) + if "RNN" in model._get_name(): + predictions = model(X) + else: + predictions = model(X, y) + prediction_collector += predictions.cpu().tolist() + + prediction_collector = np.asarray(prediction_collector) + return prediction_collector + + +def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=128): + train_set = LoadImputedData(train_X, train_y) + val_set = LoadImputedData(val_X, val_y) + test_set = LoadImputedData(test_X, test_y) + train_loader = DataLoader(train_set, batch_size, shuffle=True) + val_loader = DataLoader(val_set, batch_size, shuffle=False) + test_loader = DataLoader(test_set, batch_size, shuffle=False) + return train_loader, val_loader, test_loader + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--device", + type=str, + help="device to run the model, e.g. cuda:0", + required=True, + ) + parser.add_argument( + "--dataset", + type=str, + help="the dataset name", + required=True, + ) + parser.add_argument( + "--dataset_fold_path", + type=str, + help="the dataset fold path, where should include 3 H5 files train.h5, val.h5 and test.h5", + required=True, + ) + args = parser.parse_args() + + n_forecasting_steps = 5 # forecasting 5 steps ahead + + train_set_path = os.path.join(args.dataset_fold_path, "train.h5") + val_set_path = os.path.join(args.dataset_fold_path, "val.h5") + test_set_path = os.path.join(args.dataset_fold_path, "test.h5") + with h5py.File(train_set_path, "r") as hf: + pots_train_X = hf["X"][:] + ori_train_X = hf["X_ori"][:] + with h5py.File(val_set_path, "r") as hf: + pots_val_X = hf["X"][:] + ori_val_X = hf["X_ori"][:] + with h5py.File(test_set_path, "r") as hf: + pots_test_X = hf["X"][:] + ori_test_X = hf["X_ori"][:] + + xgb_wo_metrics_collector = {"mae": [], "mse": [], "mre": []} + xgb_metrics_collector = {"mae": [], "mse": [], "mre": []} + rnn_metrics_collector = {"mae": [], "mse": [], "mre": []} + transformer_metrics_collector = {"mae": [], "mse": [], "mre": []} + + imputed_data_path = os.path.join( + args.dataset_fold_path, + f"naive_imputation.h5", + ) + imputed_data = load_dict_from_h5(imputed_data_path) + for naive_method in ["mean", "median", "locf", "linear_interpolation"]: + for n_round in range(5): + train_X, val_X, test_X = ( + imputed_data["train"][naive_method], + imputed_data["val"][naive_method], + imputed_data["test"][naive_method], + ) + + train_y, val_y, test_y = ( + ori_train_X[:, :, -1:], + ori_val_X[:, :, -1:], + ori_test_X[:, :, -1:], + ) + train_loader, val_loader, test_loader = get_dataloaders( + train_X[:, :, :-1], + train_y, + val_X[:, :, :-1], + val_y, + test_X[:, :, :-1], + test_y, + ) + + trans_train_X, trans_train_y = np.copy(train_X), np.copy(train_y) + trans_val_X, trans_val_y = np.copy(val_X), np.copy(val_y) + trans_test_X, trans_test_y = np.copy(test_X), np.copy(test_y) + trans_val_X[:, -n_forecasting_steps:] = 0 + trans_val_y[:, :-n_forecasting_steps] = 0 + trans_test_X[:, -n_forecasting_steps:] = 0 + trans_test_y[:, :-n_forecasting_steps] = 0 + trans_train_loader, trans_val_loader, trans_test_loader = get_dataloaders( + trans_train_X[:, :, :-1], + trans_train_y, + trans_val_X[:, :, :-1], + trans_val_y, + trans_test_X[:, :, :-1], + trans_test_y, + ) + + set_random_seed(RANDOM_SEEDS[n_round]) + _, n_steps, n_features = train_X[:, :-n_forecasting_steps, :-1].shape + n_in_flatten_features = n_steps * n_features + n_out_flatten_features = np.product( + train_y[:, -n_forecasting_steps:].shape[1:] + ) + + # XGBoost model with imputation + xgb = XGBRegressor() + xgb.fit( + train_X[:, :-n_forecasting_steps, :-1].reshape( + -1, n_in_flatten_features + ), + train_y[:, -n_forecasting_steps:].reshape(-1, n_out_flatten_features), + eval_set=[ + ( + val_X[:, :-n_forecasting_steps, :-1].reshape( + -1, n_in_flatten_features + ), + val_y[:, -n_forecasting_steps:].reshape( + -1, n_out_flatten_features + ), + ) + ], + verbose=False, + ) + predictions = xgb.predict( + test_X[:, :-n_forecasting_steps, :-1].reshape(-1, n_in_flatten_features) + ) + predictions = predictions.reshape(-1, n_forecasting_steps, 1) + xgb_metrics_collector["mae"].append( + calc_mae(predictions, test_y[:, -n_forecasting_steps:]) + ) + xgb_metrics_collector["mse"].append( + calc_mse(predictions, test_y[:, -n_forecasting_steps:]) + ) + xgb_metrics_collector["mre"].append( + calc_mre(predictions, test_y[:, -n_forecasting_steps:]) + ) + + # RNN model + simple_rnn_regressor = SimpleRNNForecaster( + n_features=n_features, + n_steps=n_steps, + rnn_hidden_size=128, + n_out_features=1, + n_out_steps=n_forecasting_steps, + ) + simple_rnn_regressor = simple_rnn_regressor.to(args.device) + predictions = train( + simple_rnn_regressor, train_loader, val_loader, test_loader + ) + rnn_metrics_collector["mae"].append( + calc_mae(predictions, test_y[:, -n_forecasting_steps:]) + ) + rnn_metrics_collector["mse"].append( + calc_mse(predictions, test_y[:, -n_forecasting_steps:]) + ) + rnn_metrics_collector["mre"].append( + calc_mre(predictions, test_y[:, -n_forecasting_steps:]) + ) + + # Transformer model + transformer_forecaster = TransformerForecaster( + n_features=n_features, + n_steps=n_steps + n_forecasting_steps, + n_out_features=1, + n_out_steps=n_forecasting_steps, + n_layers=1, + d_model=64, + n_heads=2, + d_ffn=128, + dropout=0.1, + attn_dropout=0, + ) + transformer_forecaster = transformer_forecaster.to(args.device) + predictions = train( + transformer_forecaster, + trans_train_loader, + trans_val_loader, + trans_test_loader, + ) + transformer_metrics_collector["mae"].append( + calc_mae(predictions, test_y[:, -n_forecasting_steps:]) + ) + transformer_metrics_collector["mse"].append( + calc_mse(predictions, test_y[:, -n_forecasting_steps:]) + ) + transformer_metrics_collector["mre"].append( + calc_mre(predictions, test_y[:, -n_forecasting_steps:]) + ) + + logger.info( + "\n" + f"XGB (with {naive_method} imputation) regression " + f"MAE: {np.mean(xgb_metrics_collector['mae']):.4f}±{np.std(xgb_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(xgb_metrics_collector['mse']):.4f}±{np.std(xgb_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(xgb_metrics_collector['mre']):.4f}±{np.std(xgb_metrics_collector['mre']):.4f}\n" + f"RNN (with {naive_method} imputation) regression " + f"MAE: {np.mean(rnn_metrics_collector['mae']):.4f}±{np.std(rnn_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(rnn_metrics_collector['mse']):.4f}±{np.std(rnn_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(rnn_metrics_collector['mre']):.4f}±{np.std(rnn_metrics_collector['mre']):.4f}\n" + f"Transformer (with {naive_method} imputation) regression " + f"MAE: {np.mean(transformer_metrics_collector['mae']):.4f}±{np.std(transformer_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(transformer_metrics_collector['mse']):.4f}±{np.std(transformer_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(transformer_metrics_collector['mre']):.4f}±{np.std(transformer_metrics_collector['mre']):.4f}\n" + ) diff --git a/benchmark_code/downstream_regression.py b/benchmark_code/downstream_regression.py index c12fc78..52145fc 100644 --- a/benchmark_code/downstream_regression.py +++ b/benchmark_code/downstream_regression.py @@ -184,8 +184,6 @@ def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=1 ) args = parser.parse_args() - n_forecasting_steps = 5 - train_set_path = os.path.join(args.dataset_fold_path, "train.h5") val_set_path = os.path.join(args.dataset_fold_path, "val.h5") test_set_path = os.path.join(args.dataset_fold_path, "test.h5") diff --git a/benchmark_code/downstream_regression_naive.py b/benchmark_code/downstream_regression_naive.py new file mode 100644 index 0000000..7a40d4f --- /dev/null +++ b/benchmark_code/downstream_regression_naive.py @@ -0,0 +1,281 @@ +""" +A simple RNN classifier for downstream classification task on imputed Physionet 2012 dataset. +""" + +# Created by Wenjie Du +# License: BSD-3-Clause + +import argparse +import os + +import h5py +import numpy as np +import torch +import torch.nn as nn +from pypots.data.saving import load_dict_from_h5 +from pypots.nn.modules.transformer import ( + TransformerEncoder, + PositionalEncoding, +) +from pypots.utils.logging import logger +from pypots.utils.metrics import calc_mae, calc_mse, calc_mre +from pypots.utils.random import set_random_seed +from torch.utils.data import Dataset, DataLoader +from xgboost import XGBRegressor + +from global_config import RANDOM_SEEDS + + +class LoadImputedData(Dataset): + def __init__(self, imputed_input, target): + self.imputed_input = imputed_input + self.target = target + + def __len__(self): + return len(self.imputed_input) + + def __getitem__(self, idx): + return ( + torch.from_numpy(self.imputed_input[idx]).to(torch.float32), + torch.from_numpy(self.target[idx]).to(torch.float32), + ) + + +class SimpleRNNRegressor(torch.nn.Module): + def __init__(self, n_features, rnn_hidden_size, n_out_features): + super().__init__() + self.rnn = torch.nn.LSTM( + n_features, + hidden_size=rnn_hidden_size, + batch_first=True, + ) + self.fcn = torch.nn.Linear(rnn_hidden_size, n_out_features) + + def forward(self, data): + hidden_states, _ = self.rnn(data) + output = self.fcn(hidden_states) + return output + + +class TransformerRegressor(torch.nn.Module): + def __init__( + self, + n_features, + n_out_features, + n_layers, + d_model, + n_heads, + d_ffn, + dropout, + attn_dropout, + ): + super().__init__() + self.embedding = nn.Linear(n_features, d_model) + self.pos_encoding = PositionalEncoding(d_model) + self.transformer_encoder = TransformerEncoder( + n_layers=n_layers, + d_model=d_model, + n_heads=n_heads, + d_k=int(d_model / n_heads), + d_v=int(d_model / n_heads), + d_ffn=d_ffn, + dropout=dropout, + attn_dropout=attn_dropout, + ) + self.output = nn.Linear(d_model, n_out_features) + + def forward(self, data): + embedding = self.pos_encoding(self.embedding(data)) + encoding, _ = self.transformer_encoder(embedding) + output = self.output(encoding) + return output + + +def train(model, train_dataloader, val_dataloader): + n_epochs = 100 + patience = 10 + optimizer = torch.optim.Adam(model.parameters(), 1e-3) + current_patience = patience + best_loss = float("inf") + for epoch in range(n_epochs): + model.train() + for idx, data in enumerate(train_dataloader): + X, y = map(lambda x: x.to(args.device), data) + optimizer.zero_grad() + predictions = model(X) + loss = calc_mse(predictions, y) + loss.backward() + optimizer.step() + + model.eval() + loss_collector = [] + with torch.no_grad(): + for idx, data in enumerate(val_dataloader): + X, y = map(lambda x: x.to(args.device), data) + predictions = model(X) + loss = calc_mse(predictions, y) + loss_collector.append(loss.item()) + + loss = np.asarray(loss_collector).mean() + if best_loss > loss: + current_patience = patience + best_loss = loss + best_model = model.state_dict() + else: + current_patience -= 1 + + if current_patience == 0: + break + + model.load_state_dict(best_model) + model.eval() + + probability_collector = [] + for idx, data in enumerate(test_loader): + X, y = map(lambda x: x.to(args.device), data) + probabilities = model.forward(X) + probability_collector += probabilities.cpu().tolist() + + probability_collector = np.asarray(probability_collector) + return probability_collector + + +def get_dataloaders(train_X, train_y, val_X, val_y, test_X, test_y, batch_size=128): + train_set = LoadImputedData(train_X, train_y) + val_set = LoadImputedData(val_X, val_y) + test_set = LoadImputedData(test_X, test_y) + train_loader = DataLoader(train_set, batch_size, shuffle=True) + val_loader = DataLoader(val_set, batch_size, shuffle=False) + test_loader = DataLoader(test_set, batch_size, shuffle=False) + return train_loader, val_loader, test_loader + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--device", + type=str, + help="device to run the model, e.g. cuda:0", + required=True, + ) + parser.add_argument( + "--dataset", + type=str, + help="the dataset name", + required=True, + ) + parser.add_argument( + "--dataset_fold_path", + type=str, + help="the dataset fold path, where should include 3 H5 files train.h5, val.h5 and test.h5", + required=True, + ) + args = parser.parse_args() + + train_set_path = os.path.join(args.dataset_fold_path, "train.h5") + val_set_path = os.path.join(args.dataset_fold_path, "val.h5") + test_set_path = os.path.join(args.dataset_fold_path, "test.h5") + with h5py.File(train_set_path, "r") as hf: + pots_train_X = hf["X"][:, :, :-1] + ori_train_X = hf["X_ori"][:] + with h5py.File(val_set_path, "r") as hf: + pots_val_X = hf["X"][:, :, :-1] + ori_val_X = hf["X_ori"][:] + with h5py.File(test_set_path, "r") as hf: + pots_test_X = hf["X"][:, :, :-1] + ori_test_X = hf["X_ori"][:] + + xgb_wo_metrics_collector = {"mae": [], "mse": [], "mre": []} + xgb_metrics_collector = {"mae": [], "mse": [], "mre": []} + rnn_metrics_collector = {"mae": [], "mse": [], "mre": []} + transformer_metrics_collector = {"mae": [], "mse": [], "mre": []} + + imputed_data_path = os.path.join( + args.dataset_fold_path, + f"naive_imputation.h5", + ) + imputed_data = load_dict_from_h5(imputed_data_path) + for naive_method in ["mean", "median", "locf", "linear_interpolation"]: + for n_round in range(5): + train_X, val_X, test_X = ( + imputed_data["train"][naive_method][:, :, :-1], + imputed_data["val"][naive_method][:, :, :-1], + imputed_data["test"][naive_method][:, :, :-1], + ) + train_y, val_y, test_y = ( + ori_train_X[:, :, -1], + ori_val_X[:, :, -1], + ori_test_X[:, :, -1], + ) + train_loader, val_loader, test_loader = get_dataloaders( + train_X, + np.expand_dims(train_y, -1), + val_X, + np.expand_dims(val_y, -1), + test_X, + np.expand_dims(test_y, -1), + ) + set_random_seed(RANDOM_SEEDS[n_round]) + n_flatten_features = np.product(train_X.shape[1:]) + + # XGBoost model with imputation + xgb = XGBRegressor() + xgb.fit( + train_X.reshape(-1, n_flatten_features), + train_y, + eval_set=[(val_X.reshape(-1, n_flatten_features), val_y)], + verbose=False, + ) + predictions = xgb.predict(test_X.reshape(-1, n_flatten_features)) + xgb_metrics_collector["mae"].append(calc_mae(predictions, test_y)) + xgb_metrics_collector["mse"].append(calc_mse(predictions, test_y)) + xgb_metrics_collector["mre"].append(calc_mre(predictions, test_y)) + + # RNN model + simple_rnn_regressor = SimpleRNNRegressor( + n_features=train_X.shape[-1], rnn_hidden_size=128, n_out_features=1 + ) + simple_rnn_regressor = simple_rnn_regressor.to(args.device) + predictions = train(simple_rnn_regressor, train_loader, val_loader) + rnn_metrics_collector["mae"].append(calc_mae(predictions.squeeze(), test_y)) + rnn_metrics_collector["mse"].append(calc_mse(predictions.squeeze(), test_y)) + rnn_metrics_collector["mre"].append(calc_mre(predictions.squeeze(), test_y)) + + # Transformer model + transformer_classifier = TransformerRegressor( + n_features=train_X.shape[-1], + n_out_features=1, + n_layers=1, + d_model=64, + n_heads=2, + d_ffn=128, + dropout=0.1, + attn_dropout=0, + ) + transformer_classifier = transformer_classifier.to(args.device) + predictions = train(transformer_classifier, train_loader, val_loader) + transformer_metrics_collector["mae"].append( + calc_mae(predictions.squeeze(), test_y) + ) + transformer_metrics_collector["mse"].append( + calc_mse(predictions.squeeze(), test_y) + ) + transformer_metrics_collector["mre"].append( + calc_mre(predictions.squeeze(), test_y) + ) + + logger.info( + "\n" + f"XGB (with {naive_method} imputation) regression " + f"MAE: {np.mean(xgb_metrics_collector['mae']):.4f}±{np.std(xgb_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(xgb_metrics_collector['mse']):.4f}±{np.std(xgb_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(xgb_metrics_collector['mre']):.4f}±{np.std(xgb_metrics_collector['mre']):.4f}\n" + f"RNN (with {naive_method} imputation) regression " + f"MAE: {np.mean(rnn_metrics_collector['mae']):.4f}±{np.std(rnn_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(rnn_metrics_collector['mse']):.4f}±{np.std(rnn_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(rnn_metrics_collector['mre']):.4f}±{np.std(rnn_metrics_collector['mre']):.4f}\n" + f"Transformer (with {naive_method} imputation) regression " + f"MAE: {np.mean(transformer_metrics_collector['mae']):.4f}±{np.std(transformer_metrics_collector['mae']):.4f}, " + f"MSE: {np.mean(transformer_metrics_collector['mse']):.4f}±{np.std(transformer_metrics_collector['mse']):.4f}, " + f"MRE: {np.mean(transformer_metrics_collector['mre']):.4f}±{np.std(transformer_metrics_collector['mre']):.4f}\n" + )