diff --git a/.gitignore b/.gitignore index 4dbd91a8..709219e1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__ docs/build docs/source/*.ipynb docs/source/out +docs/source/data .coverage .coverage.* .cache/** diff --git a/docs/Makefile b/docs/Makefile index 66381709..f0999e20 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -18,12 +18,19 @@ html: prepare # notebooks to the source folder so nbsphinx can build docs off of them prepare: rm -rf ./source/rubicon-root ./source/*.ipynb + rm -rf ./source/data + cp ../notebooks/git-integration.ipynb ./source cp ../notebooks/logging-basics.ipynb ./source cp ../notebooks/logging-asynchronously.ipynb ./source cp ../notebooks/logging-concurrently.ipynb ./source cp ../notebooks/logging-training-metadata.ipynb ./source cp ../notebooks/logging-in-prefect-flows.ipynb ./source + cp ../notebooks/visualizing-logged-dataframes.ipynb ./source + + # some notebooks require sample data + mkdir ./source/data + cp ../notebooks/data/revenue_data.csv ./source/data # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). diff --git a/docs/docs-environment.yml b/docs/docs-environment.yml index 2d8dba4a..fdba23e8 100644 --- a/docs/docs-environment.yml +++ b/docs/docs-environment.yml @@ -13,6 +13,7 @@ dependencies: - ipykernel - jupyter_client - scikit-learn + - hvplot - pip: - -e ../[all] diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css index 5dd7c936..2f827571 100644 --- a/docs/source/_static/custom.css +++ b/docs/source/_static/custom.css @@ -20,4 +20,9 @@ text-align: right; font-size: 20px; margin-top: -14px; +} + +/* unknown warning was showing, manually hiding */ +#Visualizing-Logged-Dataframes .admonition.warning { + display: none; } \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 6eea17bd..d05e6d3e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -82,7 +82,8 @@ # The theme to use for HTML and HTML Help pages. html_theme = "furo" # hide rubicon because it's already in the logo, but will still get pulled into the tab -html_title = f"
v{version}
" +# strip the version down so we don't include dirty tags +html_title = f"
v{version[:5]}
" html_static_path = ["_static"] html_css_files = ["custom.css"] html_theme_options = { diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 6aa5bd8c..598642ec 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -13,6 +13,7 @@ Rubicon Basics logging-basics logging-training-metadata + visualizing-logged-dataframes Advanced Features ----------------- diff --git a/notebooks/data/revenue_data.csv b/notebooks/data/revenue_data.csv new file mode 100644 index 00000000..f478bc89 --- /dev/null +++ b/notebooks/data/revenue_data.csv @@ -0,0 +1,6 @@ +Company,Revenue (in millions) +Walmart,514405 +Exxon Mobil,290212 +Apple,265595 +Berkshire Hathaway,247837 +Amazon.com,232887 diff --git a/notebooks/visualizing-logged-dataframes.ipynb b/notebooks/visualizing-logged-dataframes.ipynb new file mode 100644 index 00000000..cf648a6b --- /dev/null +++ b/notebooks/visualizing-logged-dataframes.ipynb @@ -0,0 +1,96 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualizing Logged Dataframes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A `dataframe` is a two-dimensional, tabular dataset with labeled axes (rows and columns) that provides value to the model developer or reviewer when visualized. `rubicon` makes it easy to log dataframes and comes with default visualization support, using [hvplot](https://hvplot.holoviz.org/index.html) under-the-hood. `hvplot` provides an an interactive Bokeh-based plotting API that supports panning, zooming, hovering, and clickable/selectable legends." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "Let's create a project, log a sample dataframe to it, and then visualize it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from rubicon import Rubicon\n", + "\n", + "rubicon = Rubicon(persistence=\"filesystem\", root_dir=\"./rubicon-root\")\n", + "project = rubicon.get_or_create_project(\"Plotting Example\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualizing Dataframes\n", + "\n", + "`Dataframe.plot` exposes plotting functionality to create simple plots like line, bar, scatter, or table plots." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv(f'./data/revenue_data.csv', index_col=0)\n", + "\n", + "experiment = project.log_experiment(name=\"plotting experiment\")\n", + "dataframe = experiment.log_dataframe(df, description=\"sample revenue df\")\n", + "\n", + "# any args will be passed to hvplot, allowing customization\n", + "revenue_line = dataframe.plot(kind=\"line\", x=\"Company\", \n", + " y=\"Revenue (in millions)\", width=600)\n", + "revenue_scatter = dataframe.plot(kind=\"scatter\", x=\"Company\", \n", + " y=\"Revenue (in millions)\", width=600)\n", + "\n", + "# compatible with holoviews composition\n", + "(revenue_line + revenue_scatter).cols(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + }, + "nbsphinx": { + "execute": "always" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/rubicon/client/dataframe.py b/rubicon/client/dataframe.py index 2dbc5731..58d8d952 100644 --- a/rubicon/client/dataframe.py +++ b/rubicon/client/dataframe.py @@ -1,4 +1,5 @@ from rubicon.client import Base, TagMixin +from rubicon.exceptions import RubiconException class Dataframe(Base, TagMixin): @@ -38,9 +39,35 @@ def _get_data(self): project_name, self.id, experiment_id=experiment_id ) - def plot(self, kind="table", **kwargs): - """Render the dataframe.""" - raise NotImplementedError + def plot(self, **kwargs): + """Render the dataframe using `hvplot`. + + Parameters + ---------- + kwargs : dict + Additional keyword arguments to be passed along to the + `hvplot` function. + + Notes + ----- + For usage, visit: https://hvplot.holoviz.org/user_guide/Plotting.html + For customizations, visit: + https://hvplot.holoviz.org/user_guide/Customization.html + + Examples + -------- + >>> # Log a line plot + >>> dataframe.plot(kind='line', x='Year', y='Number of Subscriptions') + """ + try: + # data is a dask dataframe + import hvplot.dask # noqa F401 + except ImportError: + raise RubiconException( + "`hvplot` is required for plotting. Install with `pip install hvplot`." + ) + + return self.data.hvplot(**kwargs) @property def id(self):