diff --git a/examples/getting_started.ipynb b/examples/getting_started.ipynb new file mode 100644 index 0000000..866fb07 --- /dev/null +++ b/examples/getting_started.ipynb @@ -0,0 +1,372 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.10 64-bit ('ecnet': conda)" + }, + "interpreter": { + "hash": "b8ddbdeb4e8d258564393fa38a886a93a7bbb414136361bc7c3a5a1c29ceff9e" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of samples: 43\nNumber of QSPR descriptors per sample: 1875\n" + ] + } + ], + "source": [ + "# First, let's load our experimental cloud point data; we're using PaDEL-Descriptor to generate QSPR descriptors\n", + "\n", + "from ecnet.datasets import load_cp\n", + "\n", + "dataset = load_cp(as_dataset=True, backend='padel')\n", + "\n", + "print(f'Number of samples: {dataset.desc_vals.shape[0]}')\n", + "print(f'Number of QSPR descriptors per sample: {dataset.desc_vals.shape[1]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Number of samples in the training set: 34\nNumber of samples in the testing set: 9\n" + ] + } + ], + "source": [ + "# Now we create training and testing data subsets; our ANNs regress directly on the training data, and the test set is used to measure blind prediction accuracy\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "from copy import deepcopy\n", + "\n", + "index_train, index_test = train_test_split([i for i in range(len(dataset))], test_size=0.2, random_state=24)\n", + "\n", + "dataset_train = deepcopy(dataset)\n", + "dataset_train.set_index(index_train)\n", + "\n", + "dataset_test = deepcopy(dataset)\n", + "dataset_test.set_index(index_test)\n", + "\n", + "print(f'Number of samples in the training set: {len(dataset_train)}')\n", + "print(f'Number of samples in the testing set: {len(dataset_test)}')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
" + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n \n \n \n \n 2021-06-30T12:35:48.301674\n image/svg+xml\n \n \n Matplotlib v3.4.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Many QSPR descriptors are not important when predicting cloud point for a database of hydrocarbons and oxygenated compounds; for example, the descriptor counting the number of nitrogen atoms will be zero for all compounds. We will select the descriptors with the highest correlation to cloud point for use as ANN inputs, such that 95% of total correlation (derived from random forest regression) is retained:\n", + "\n", + "from ecnet.tasks.feature_selection import select_rfr\n", + "from matplotlib import pyplot as plt\n", + "\n", + "# Note: we select based on the training set, we want the test set to be 100% blind\n", + "desc_idx, desc_imp = select_rfr(dataset_train, total_importance=0.95, n_estimators=50)\n", + "\n", + "dataset_train.set_desc_index(desc_idx)\n", + "dataset_test.set_desc_index(desc_idx)\n", + "\n", + "# Let's graph importance (individual and cumulative sum) for the selected descriptors:\n", + "rank = [i for i in range(len(desc_imp))]\n", + "tot_imp = [0.0]\n", + "for imp in desc_imp:\n", + " tot_imp.append(tot_imp[-1] + imp)\n", + "tot_imp = tot_imp[1:]\n", + "\n", + "plt.clf()\n", + "fig, ax = plt.subplots(constrained_layout=True)\n", + "ax.set_xlabel('Selected descriptor (most-to-least important)')\n", + "ax.set_ylabel('Descriptor importance')\n", + "ax.plot(rank, desc_imp, color='red')\n", + "ax2 = ax.twinx()\n", + "ax2.set_ylabel('Cumulative importance')\n", + "ax2.plot(rank, tot_imp, color='blue')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n \n \n \n \n 2021-06-30T12:35:48.486805\n image/svg+xml\n \n \n Matplotlib v3.4.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# We observe that there are only a handful of QSPR descriptors with significant correlation to cloud point; let's visualize the relationship between kinematic viscosity and the descriptor with the highest importance:\n", + "\n", + "ysi = [dataset_train.target_vals[i][0] for i in range(len(dataset_train))]\n", + "top_desc = [dataset_train.desc_vals[i][0] for i in range(len(dataset_train))]\n", + "\n", + "plt.clf()\n", + "plt.xlabel(f'{dataset_train.desc_names[0]} value')\n", + "plt.ylabel('Experimental cloud point value (deg. C)')\n", + "plt.scatter(top_desc, ysi, color='blue')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch: 0 | Train loss: 3019.848388671875 | Valid loss: 9223372036854775807\n", + "Epoch: 10 | Train loss: 301.50048828125 | Valid loss: 571.7460327148438\n", + "Epoch: 20 | Train loss: 351.8019104003906 | Valid loss: 800.0086669921875\n", + "Epoch: 30 | Train loss: 330.5296325683594 | Valid loss: 539.5159301757812\n", + "Epoch: 40 | Train loss: 264.02142333984375 | Valid loss: 315.75274658203125\n", + "Epoch: 50 | Train loss: 198.6739501953125 | Valid loss: 249.75222778320312\n", + "Epoch: 60 | Train loss: 87.38817596435547 | Valid loss: 125.52989196777344\n", + "Epoch: 70 | Train loss: 59.61891555786133 | Valid loss: 100.54280090332031\n", + "Epoch: 80 | Train loss: 43.34950256347656 | Valid loss: 78.16502380371094\n", + "Epoch: 90 | Train loss: 60.24040985107422 | Valid loss: 91.14484405517578\n" + ] + } + ], + "source": [ + "# Enough exploration, let's train an ANN to predict kinematic viscosity:\n", + "\n", + "from ecnet import ECNet\n", + "\n", + "# Create an ANN with `n` input neurons (where `n` == number of selected QSPR descriptors), 2 hidden layers with 256 neurons each, and one output neuron (corresponding to yield sooting index)\n", + "model = ECNet(dataset_train.desc_vals.shape[1], 1, 256, 2)\n", + "# arguments follow [input dim, output dim, hidden dim, n hidden]\n", + "\n", + "# Train the ANN using training dataset, with a random 20% of the dataset used for validation every epoch:\n", + "train_loss, valid_loss = model.fit(\n", + " dataset=dataset_train, valid_size=0.2, verbose=10,\n", + " patience=32, epochs=300, random_state=None, shuffle=True,\n", + " lr=0.005\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n \n \n \n \n 2021-06-30T12:35:48.955909\n image/svg+xml\n \n \n Matplotlib v3.4.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# We can visualize the training set's loss over training, as well as the validation subset's loss:\n", + "\n", + "from math import sqrt\n", + "\n", + "train_loss = [sqrt(l) for l in train_loss][5:]\n", + "valid_loss = [sqrt(l) for l in valid_loss][5:]\n", + "epoch = [i for i in range(len(train_loss))]\n", + "plt.clf()\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Sqrt(Loss)')\n", + "plt.plot(epoch, train_loss, color='blue', label='Training Loss')\n", + "plt.plot(epoch, valid_loss, color='red', label='Validation Loss')\n", + "plt.legend(loc='upper right')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training median absolute error: 4.776594161987305\nTraining r-squared coefficient: 0.8675852185693186\nTesting median absolute error: 9.086380004882812\nTesting r-squared coefficient: 0.8877325994065388\n" + ] + } + ], + "source": [ + "# Let's calculate median absolute error and r-squared coefficient for each dataset:\n", + "\n", + "from sklearn.metrics import median_absolute_error, r2_score\n", + "\n", + "y_hat_train = model(dataset_train.desc_vals).detach().numpy()\n", + "y_train = dataset_train.target_vals\n", + "train_mae = median_absolute_error(y_hat_train, y_train)\n", + "train_r2 = r2_score(y_hat_train, y_train)\n", + "y_hat_test = model(dataset_test.desc_vals).detach().numpy()\n", + "y_test = dataset_test.target_vals\n", + "test_mae = median_absolute_error(y_hat_test, y_test)\n", + "test_r2 = r2_score(y_hat_test, y_test)\n", + "print(f'Training median absolute error: {train_mae}')\n", + "print(f'Training r-squared coefficient: {train_r2}')\n", + "print(f'Testing median absolute error: {test_mae}')\n", + "print(f'Testing r-squared coefficient: {test_r2}')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n \n \n \n \n 2021-06-30T12:35:49.142649\n image/svg+xml\n \n \n Matplotlib v3.4.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# Now we can visually compare predicted values to experimental values:\n", + "\n", + "plt.clf()\n", + "plt.xlabel('Experimental CP Value (deg. C)')\n", + "plt.ylabel('Predicted CP Value (deg. C)')\n", + "plt.scatter(y_train, y_hat_train, color='blue', label='Training Set')\n", + "plt.scatter(y_test, y_hat_test, color='red', label='Testing Set')\n", + "plt.legend(loc='upper left')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's save our model for later use:\n", + "\n", + "model.save('cp_model.pt')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Training median absolute error: 4.776594161987305\nTraining r-squared coefficient: 0.8675852185693186\nTesting median absolute error: 9.086380004882812\nTesting r-squared coefficient: 0.8877325994065388\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n \n \n \n \n 2021-06-30T12:35:49.338191\n image/svg+xml\n \n \n Matplotlib v3.4.2, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# And test to make sure we can recall it:\n", + "\n", + "from ecnet.model import load_model\n", + "\n", + "model_2 = load_model('cp_model.pt')\n", + "\n", + "y_hat_train = model_2(dataset_train.desc_vals).detach().numpy()\n", + "y_train = dataset_train.target_vals\n", + "train_mae = median_absolute_error(y_hat_train, y_train)\n", + "train_r2 = r2_score(y_hat_train, y_train)\n", + "y_hat_test = model_2(dataset_test.desc_vals).detach().numpy()\n", + "y_test = dataset_test.target_vals\n", + "test_mae = median_absolute_error(y_hat_test, y_test)\n", + "test_r2 = r2_score(y_hat_test, y_test)\n", + "print(f'Training median absolute error: {train_mae}')\n", + "print(f'Training r-squared coefficient: {train_r2}')\n", + "print(f'Testing median absolute error: {test_mae}')\n", + "print(f'Testing r-squared coefficient: {test_r2}')\n", + "\n", + "plt.clf()\n", + "plt.xlabel('Experimental CP Value (deg. C)')\n", + "plt.ylabel('Predicted CP Value (deg. C)')\n", + "plt.scatter(y_train, y_hat_train, color='blue', label='Training Set')\n", + "plt.scatter(y_test, y_hat_test, color='red', label='Testing Set')\n", + "plt.legend(loc='upper left')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file