diff --git a/README.md b/README.md index 75a1d43..dd8b664 100644 --- a/README.md +++ b/README.md @@ -45,11 +45,52 @@ To savour the flavours of `fusilli`, you can install it using pip: pip install fusilli ``` -## How to Cite +## Quick Start + +Here is a quick example of how to use `fusilli` to train a regression model and plot the real values vs. predicted +values. + +``` + from fusilli.data import prepare_fusion_data + from fusilli.train import train_and_save_models + from fusilli.eval import RealsVsPreds + import matplotlib.pyplot as plt + + # Import the example fusion model + from fusilli.fusionmodels.tabularfusion.example_model import ExampleModel + + data_paths = { + "tabular1": "path/to/tabular_1.csv", + "tabular2": "path/to/tabular_2.csv", + "image": "path/to/image_file.pt", + } + + output_paths = { + "checkpoints": "path/to/checkpoints/dir", + "losses": "path/to/losses/dir", + "figures": "path/to/figures/dir", + } + + # Get the data ready + data_module = prepare_fusion_data(prediction_task="regression", + fusion_model=ExampleModel, + data_paths=data_paths, + output_paths=output_paths) + + # Train the model + trained_model = train_and_save_models(data_module=data_module, + fusion_model=ExampleModel) + + # Evaluate the model by plotting the real values vs. predicted values + RealsVsPreds_figure = RealsVsPreds.from_final_val_data(trained_model) + plt.show() +``` -Florence Townend, Patrick J. Roddy, & Philipp Goebl. (2024). florencejt/fusilli: Fusilli v1.1.0 (v1.1.0). Zenodo. https://doi.org/10.5281/zenodo.10463697 +## How to Cite +Florence Townend, Patrick J. Roddy, & Philipp Goebl. (2024). florencejt/fusilli: Fusilli v1.1.0 (v1.1.0). +Zenodo. https://doi.org/10.5281/zenodo.10463697 ## Contribute! diff --git a/docs/auto_examples/training_and_testing/images/thumb/sphx_glr_plot_using_external_data_thumb.png b/docs/auto_examples/training_and_testing/images/thumb/sphx_glr_plot_using_external_data_thumb.png new file mode 100644 index 0000000..b06c4e6 Binary files /dev/null and b/docs/auto_examples/training_and_testing/images/thumb/sphx_glr_plot_using_external_data_thumb.png differ diff --git a/docs/auto_examples/training_and_testing/plot_using_external_data.ipynb b/docs/auto_examples/training_and_testing/plot_using_external_data.ipynb new file mode 100644 index 0000000..9a07d1c --- /dev/null +++ b/docs/auto_examples/training_and_testing/plot_using_external_data.ipynb @@ -0,0 +1,97 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n# Using External Test Data\n\nLet's learn how to use external test data with Fusilli!\nSome guidance can also be found in the `Data Loading ` section of the documentation.\n\nThe extra step that we need to take is to provide the paths to the test data files to the functions that create evaluation figures: :class:`~fusilli.eval.RealsVsPreds.from_new_data`, :class:`~fusilli.eval.ConfusionMatrix.from_new_data`, :class:`~fusilli.eval.ModelComparison.from_new_data`.\n\n

Note

It is not possible to use external test data with graph-based fusion models.

\n\n\nWe'll rush through the first few steps of the training and testing process, as they are covered in more detail in the other example notebooks.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\nfrom tqdm.auto import tqdm\nimport os\n\nfrom docs.examples import generate_sklearn_simulated_data\nfrom fusilli.data import prepare_fusion_data\nfrom fusilli.eval import RealsVsPreds, ModelComparison\nfrom fusilli.train import train_and_save_models\nfrom fusilli.utils.model_chooser import import_chosen_fusion_models\n\n# sphinx_gallery_thumbnail_number = -1\n\n\nmodel_conditions = {\n \"class_name\": [\"ConcatTabularData\"],\n}\n\nfusion_models = import_chosen_fusion_models(model_conditions)\n\n# Regression task\nprediction_task = \"regression\"\n\n# Set the batch size\nbatch_size = 48\n\n# Setting output directories\noutput_paths = {\n \"losses\": \"loss_logs/external_data\",\n \"checkpoints\": \"checkpoints/external_data\",\n \"figures\": \"figures/external_data\",\n}\n\nfor dir in output_paths.values():\n os.makedirs(dir, exist_ok=True)\n\n# Clearing the loss logs directory (only for the example notebooks)\nfor dir in os.listdir(output_paths[\"losses\"]):\n # remove files\n for file in os.listdir(os.path.join(output_paths[\"losses\"], dir)):\n os.remove(os.path.join(output_paths[\"losses\"], dir, file))\n # remove dir\n os.rmdir(os.path.join(output_paths[\"losses\"], dir))\n\ntabular1_path, tabular2_path = generate_sklearn_simulated_data(prediction_task,\n num_samples=500,\n num_tab1_features=10,\n num_tab2_features=20)\n\nexternal_tabular1_path, external_tabular2_path = generate_sklearn_simulated_data(prediction_task,\n num_samples=100,\n num_tab1_features=10,\n num_tab2_features=20,\n external=True)\ndata_paths = {\n \"tabular1\": tabular1_path,\n \"tabular2\": tabular2_path,\n \"image\": \"\",\n}\n\nexternal_data_paths = {\n \"tabular1\": external_tabular1_path,\n \"tabular2\": external_tabular2_path,\n \"image\": \"\",\n}\n\nfusion_model = fusion_models[0]\n\nprint(\"Method name:\", fusion_model.method_name)\nprint(\"Modality type:\", fusion_model.modality_type)\nprint(\"Fusion type:\", fusion_model.fusion_type)\n\n# Create the data module\ndm = prepare_fusion_data(prediction_task=prediction_task,\n fusion_model=fusion_model,\n data_paths=data_paths,\n output_paths=output_paths,\n batch_size=batch_size, )\n\n# train and test\ntrained_model = train_and_save_models(\n data_module=dm,\n fusion_model=fusion_model,\n enable_checkpointing=True,\n show_loss_plot=True,\n)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluating with validation data\nWe'll start by evaluating the model with the validation data.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "reals_preds_validation = RealsVsPreds.from_final_val_data(trained_model)\nplt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluating with external data\nNow we'll evaluate the model with the external data.\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "reals_preds_external = RealsVsPreds.from_new_data(trained_model,\n output_paths=output_paths,\n test_data_paths=external_data_paths)\nplt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Removing checkpoint files\n\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "for dir in os.listdir(output_paths[\"checkpoints\"]):\n # remove files\n os.remove(os.path.join(output_paths[\"checkpoints\"], dir))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/docs/auto_examples/training_and_testing/plot_using_external_data.py b/docs/auto_examples/training_and_testing/plot_using_external_data.py new file mode 100644 index 0000000..7942b7a --- /dev/null +++ b/docs/auto_examples/training_and_testing/plot_using_external_data.py @@ -0,0 +1,128 @@ +""" +Using External Test Data +======================================================================== + +Let's learn how to use external test data with Fusilli! +Some guidance can also be found in the :ref:`Data Loading ` section of the documentation. + +The extra step that we need to take is to provide the paths to the test data files to the functions that create evaluation figures: :class:`~fusilli.eval.RealsVsPreds.from_new_data`, :class:`~fusilli.eval.ConfusionMatrix.from_new_data`, :class:`~fusilli.eval.ModelComparison.from_new_data`. + +.. note:: + + It is not possible to use external test data with graph-based fusion models. + + +We'll rush through the first few steps of the training and testing process, as they are covered in more detail in the other example notebooks. + +""" + +import matplotlib.pyplot as plt +from tqdm.auto import tqdm +import os + +from docs.examples import generate_sklearn_simulated_data +from fusilli.data import prepare_fusion_data +from fusilli.eval import RealsVsPreds, ModelComparison +from fusilli.train import train_and_save_models +from fusilli.utils.model_chooser import import_chosen_fusion_models + +# sphinx_gallery_thumbnail_number = -1 + + +model_conditions = { + "class_name": ["ConcatTabularData"], +} + +fusion_models = import_chosen_fusion_models(model_conditions) + +# Regression task +prediction_task = "regression" + +# Set the batch size +batch_size = 48 + +# Setting output directories +output_paths = { + "losses": "loss_logs/external_data", + "checkpoints": "checkpoints/external_data", + "figures": "figures/external_data", +} + +for dir in output_paths.values(): + os.makedirs(dir, exist_ok=True) + +# Clearing the loss logs directory (only for the example notebooks) +for dir in os.listdir(output_paths["losses"]): + # remove files + for file in os.listdir(os.path.join(output_paths["losses"], dir)): + os.remove(os.path.join(output_paths["losses"], dir, file)) + # remove dir + os.rmdir(os.path.join(output_paths["losses"], dir)) + +tabular1_path, tabular2_path = generate_sklearn_simulated_data(prediction_task, + num_samples=500, + num_tab1_features=10, + num_tab2_features=20) + +external_tabular1_path, external_tabular2_path = generate_sklearn_simulated_data(prediction_task, + num_samples=100, + num_tab1_features=10, + num_tab2_features=20, + external=True) +data_paths = { + "tabular1": tabular1_path, + "tabular2": tabular2_path, + "image": "", +} + +external_data_paths = { + "tabular1": external_tabular1_path, + "tabular2": external_tabular2_path, + "image": "", +} + +fusion_model = fusion_models[0] + +print("Method name:", fusion_model.method_name) +print("Modality type:", fusion_model.modality_type) +print("Fusion type:", fusion_model.fusion_type) + +# Create the data module +dm = prepare_fusion_data(prediction_task=prediction_task, + fusion_model=fusion_model, + data_paths=data_paths, + output_paths=output_paths, + batch_size=batch_size, ) + +# train and test +trained_model = train_and_save_models( + data_module=dm, + fusion_model=fusion_model, + enable_checkpointing=True, + show_loss_plot=True, +) + +# %% +# Evaluating with validation data +# ----------------------------------------------- +# We'll start by evaluating the model with the validation data. + +reals_preds_validation = RealsVsPreds.from_final_val_data(trained_model) +plt.show() + +# %% +# Evaluating with external data +# ---------------------------------------------- +# Now we'll evaluate the model with the external data. + +reals_preds_external = RealsVsPreds.from_new_data(trained_model, + output_paths=output_paths, + test_data_paths=external_data_paths) +plt.show() + +# %% +# Removing checkpoint files + +for dir in os.listdir(output_paths["checkpoints"]): + # remove files + os.remove(os.path.join(output_paths["checkpoints"], dir)) diff --git a/docs/auto_examples/training_and_testing/plot_using_external_data.rst b/docs/auto_examples/training_and_testing/plot_using_external_data.rst new file mode 100644 index 0000000..1b5cbd8 --- /dev/null +++ b/docs/auto_examples/training_and_testing/plot_using_external_data.rst @@ -0,0 +1,227 @@ + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "auto_examples/training_and_testing/plot_using_external_data.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + :ref:`Go to the end ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_auto_examples_training_and_testing_plot_using_external_data.py: + + +Using External Test Data +======================================================================== + +Let's learn how to use external test data with Fusilli! +Some guidance can also be found in the :ref:`Data Loading ` section of the documentation. + +The extra step that we need to take is to provide the paths to the test data files to the functions that create evaluation figures: :class:`~fusilli.eval.RealsVsPreds.from_new_data`, :class:`~fusilli.eval.ConfusionMatrix.from_new_data`, :class:`~fusilli.eval.ModelComparison.from_new_data`. + +.. note:: + + It is not possible to use external test data with graph-based fusion models. + + +We'll rush through the first few steps of the training and testing process, as they are covered in more detail in the other example notebooks. + +.. GENERATED FROM PYTHON SOURCE LINES 18-105 + +.. code-block:: Python + + + import matplotlib.pyplot as plt + from tqdm.auto import tqdm + import os + + from docs.examples import generate_sklearn_simulated_data + from fusilli.data import prepare_fusion_data + from fusilli.eval import RealsVsPreds, ModelComparison + from fusilli.train import train_and_save_models + from fusilli.utils.model_chooser import import_chosen_fusion_models + + # sphinx_gallery_thumbnail_number = -1 + + + model_conditions = { + "class_name": ["ConcatTabularData"], + } + + fusion_models = import_chosen_fusion_models(model_conditions) + + # Regression task + prediction_task = "regression" + + # Set the batch size + batch_size = 48 + + # Setting output directories + output_paths = { + "losses": "loss_logs/external_data", + "checkpoints": "checkpoints/external_data", + "figures": "figures/external_data", + } + + for dir in output_paths.values(): + os.makedirs(dir, exist_ok=True) + + # Clearing the loss logs directory (only for the example notebooks) + for dir in os.listdir(output_paths["losses"]): + # remove files + for file in os.listdir(os.path.join(output_paths["losses"], dir)): + os.remove(os.path.join(output_paths["losses"], dir, file)) + # remove dir + os.rmdir(os.path.join(output_paths["losses"], dir)) + + tabular1_path, tabular2_path = generate_sklearn_simulated_data(prediction_task, + num_samples=500, + num_tab1_features=10, + num_tab2_features=20) + + external_tabular1_path, external_tabular2_path = generate_sklearn_simulated_data(prediction_task, + num_samples=100, + num_tab1_features=10, + num_tab2_features=20, + external=True) + data_paths = { + "tabular1": tabular1_path, + "tabular2": tabular2_path, + "image": "", + } + + external_data_paths = { + "tabular1": external_tabular1_path, + "tabular2": external_tabular2_path, + "image": "", + } + + fusion_model = fusion_models[0] + + print("Method name:", fusion_model.method_name) + print("Modality type:", fusion_model.modality_type) + print("Fusion type:", fusion_model.fusion_type) + + # Create the data module + dm = prepare_fusion_data(prediction_task=prediction_task, + fusion_model=fusion_model, + data_paths=data_paths, + output_paths=output_paths, + batch_size=batch_size, ) + + # train and test + trained_model = train_and_save_models( + data_module=dm, + fusion_model=fusion_model, + enable_checkpointing=True, + show_loss_plot=True, + ) + + + +.. rst-class:: sphx-glr-script-out + +.. code-block:: pytb + + Traceback (most recent call last): + File "/Users/florencetownend/Library/CloudStorage/OneDrive-UniversityCollegeLondon/Projects/fusilli/docs/examples/training_and_testing/plot_using_external_data.py", line 36, in + fusion_models = import_chosen_fusion_models(model_conditions) + File "/Users/florencetownend/Library/CloudStorage/OneDrive-UniversityCollegeLondon/Projects/fusilli/fusilli/utils/model_chooser.py", line 323, in import_chosen_fusion_models + imported_models = get_models(model_conditions, skip_models) + File "/Users/florencetownend/Library/CloudStorage/OneDrive-UniversityCollegeLondon/Projects/fusilli/fusilli/utils/model_chooser.py", line 194, in get_models + fusion_models, fusion_model_dict_without_skips = all_model_importer(fusion_model_dict, skip_models=skip_models) + File "/Users/florencetownend/Library/CloudStorage/OneDrive-UniversityCollegeLondon/Projects/fusilli/fusilli/utils/model_chooser.py", line 125, in all_model_importer + module = importlib.import_module(module_path) + File "/Users/florencetownend/miniforge3/envs/fusion_eval/lib/python3.9/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1030, in _gcd_import + File "", line 1007, in _find_and_load + File "", line 986, in _find_and_load_unlocked + File "", line 680, in _load_unlocked + File "", line 850, in exec_module + File "", line 228, in _call_with_frames_removed + File "/Users/florencetownend/Library/CloudStorage/OneDrive-UniversityCollegeLondon/Projects/fusilli/fusilli/fusionmodels/tabularfusion/mcvae_model.py", line 9, in + from fusilli.utils.mcvae.src.mcvae.models import Mcvae + ImportError: cannot import name 'Mcvae' from 'fusilli.utils.mcvae.src.mcvae.models' (unknown location) + + + + +.. GENERATED FROM PYTHON SOURCE LINES 106-109 + +Evaluating with validation data +----------------------------------------------- +We'll start by evaluating the model with the validation data. + +.. GENERATED FROM PYTHON SOURCE LINES 109-113 + +.. code-block:: Python + + + reals_preds_validation = RealsVsPreds.from_final_val_data(trained_model) + plt.show() + + +.. GENERATED FROM PYTHON SOURCE LINES 114-117 + +Evaluating with external data +---------------------------------------------- +Now we'll evaluate the model with the external data. + +.. GENERATED FROM PYTHON SOURCE LINES 117-123 + +.. code-block:: Python + + + reals_preds_external = RealsVsPreds.from_new_data(trained_model, + output_paths=output_paths, + test_data_paths=external_data_paths) + plt.show() + + +.. GENERATED FROM PYTHON SOURCE LINES 124-125 + +Removing checkpoint files + +.. GENERATED FROM PYTHON SOURCE LINES 125-129 + +.. code-block:: Python + + + for dir in os.listdir(output_paths["checkpoints"]): + # remove files + os.remove(os.path.join(output_paths["checkpoints"], dir)) + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** (0 minutes 0.003 seconds) + + +.. _sphx_glr_download_auto_examples_training_and_testing_plot_using_external_data.py: + +.. only:: html + + .. container:: sphx-glr-footer sphx-glr-footer-example + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_using_external_data.ipynb ` + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: plot_using_external_data.py ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/auto_examples/training_and_testing/plot_using_external_data_codeobj.pickle b/docs/auto_examples/training_and_testing/plot_using_external_data_codeobj.pickle new file mode 100644 index 0000000..c7a98d0 Binary files /dev/null and b/docs/auto_examples/training_and_testing/plot_using_external_data_codeobj.pickle differ