diff --git a/examples/Gordo-Reporters-MlFlow.ipynb b/examples/Gordo-Reporters-MlFlow.ipynb new file mode 100644 index 000000000..2448141c1 --- /dev/null +++ b/examples/Gordo-Reporters-MlFlow.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using the Gordo Mlflow reporter with AzureML\n", + "\n", + "## Building on a cluster\n", + "When a gordo workflow is generated from a YAML config using `kubectl apply -f config.yml`, the model is built by the model builder pod. If a remote logging \"reporter\" was configured in the `config.yml`, then at the end of the model building step the metadata will be logged with the specified reporter. \n", + "\n", + "**Note**\n", + "When using the MLflow reporter, the cluster running the workflow must have the AzureML workspace credentials set to the environment variable `AZUREML_WORKSPACE_STR` as well as the `DL_SERVICE_AUTH_STR`.\n", + "\n", + "The cluster should use the workspace credentials associated with the deployment stage associated with that cluster, e.g. \"production\", \"staging\", \"testing\", etc.\n", + "\n", + "While reporters can be defined in the globals runtime when using the workflow generator, they must be defined by machine when building locally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.core.authentication import InteractiveLoginAuthentication\n", + "import mlflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_str = \"\"\"\n", + "apiVersion: equinor.com/v1\n", + "kind: Gordo\n", + "metadata:\n", + " name: test-project\n", + "spec:\n", + " deploy-version: 0.50.0\n", + " config:\n", + " machines:\n", + " - dataset:\n", + " tags:\n", + " - TRA-35TT8566.PV\n", + " - TRA-35TT8567.PV\n", + " target_tag_list:\n", + " - TRA-35TT8568.PV\n", + " - TRA-35TT8569.PV\n", + " train_end_date: '2019-03-01T00:00:00+00:00'\n", + " train_start_date: '2019-01-01T00:00:00+00:00'\n", + " data_provider: \n", + " interactive: True\n", + " metadata:\n", + " information: 'Use RandomForestRegressor to predict separate set of tags.'\n", + " model:\n", + " gordo.machine.model.anomaly.diff.DiffBasedAnomalyDetector:\n", + " base_estimator:\n", + " sklearn.compose.TransformedTargetRegressor:\n", + " transformer: sklearn.preprocessing.data.MinMaxScaler\n", + " regressor:\n", + " sklearn.pipeline.Pipeline:\n", + " steps:\n", + " - sklearn.decomposition.pca.PCA\n", + " - sklearn.multioutput.MultiOutputRegressor:\n", + " estimator:\n", + " sklearn.ensemble.forest.RandomForestRegressor:\n", + " n_estimators: 35\n", + " max_depth: 10\n", + " name: supervised-random-forest-anomaly\n", + " # During local building, reporters must be defined by machine\n", + " runtime:\n", + " reporters:\n", + " - gordo.reporters.mlflow.MlFlowReporter\n", + "globals:\n", + " runtime:\n", + " builder:\n", + " # The following is the default behavior, just shown here for example\n", + " remote_logging:\n", + " enable: False\n", + " \"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Building locally\n", + "\n", + "To build machines locally, but log remotely, configure the `AZUREML_WORKSPACE_STR` and `DL_SERVICE_AUTH_STR` as described above, then run the config file with the reporter configuration in `gordo.builder.local_build.local_build` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from gordo.builder.local_build import local_build\n", + "import os\n", + "\n", + "# This downloads 1yr of data from the datalake\n", + "# so it will of coarse take some time\n", + "model, machine = next(local_build(config_str))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# During a deployment, the CLI build method calls the reporters.\n", + "# In a local build, we'll do that manually\n", + "machine.report()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Reviewing results\n", + "\n", + "## AzureML Frontend\n", + "\n", + "The AzureML frontend can be helpful for quickly looking that your results appear to be populating correctly, for example during a gordo deployment. [Portal Link](https://ml.azure.com/?wsid=/subscriptions/019958ea-fe2c-4e14-bbd9-0d2db8ed7cfc/resourcegroups/gordo-ml-workspace-poc-rg/workspaces/gordo-ml-workspace-poc-ml)\n", + "\n", + "## Querying with MlflowClient\n", + "\n", + "\n", + "**First**, install the following packages to your working environment.\n", + "\n", + "* `azureml-contrib-run`\n", + "* `azureml-mlflow`\n", + "* `azureml-widgets`\n", + "\n", + "Results can be queried using the `mlflow` client library. The following are just some general examples, but you can find further documention on the client [here](https://www.mlflow.org/docs/latest/tracking.html#querying-runs-programmatically) as well as API documentation [here](https://www.mlflow.org/docs/latest/python_api/mlflow.tracking.html).\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get AzureML tracking URI for Workspace\n", + "# the same values you used when building\n", + "workspace_kwargs = { \n", + " \"subscription_id\":\"value\", \n", + " \"resource_group\": \"value\", \n", + " \"workspace_name\": \"value\",\n", + " \"auth\": InteractiveLoginAuthentication(force=True)\n", + " }\n", + "ws = Workspace(**workspace_kwargs)\n", + "tracking_uri = ws.get_mlflow_tracking_uri()\n", + "\n", + "# Configure a client for querying\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "client = mlflow.tracking.MlflowClient(tracking_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Experiments\n", + "Each build of a machine corresponds to a new run for an experiment with that machine's name. With each subsequent deployment, there will be a new run under each built machines name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get all experiments (can take a bit)\n", + "experiments = client.list_experiments()\n", + "\n", + "for exp in experiments[-5::]:\n", + " print(exp.name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get a single experiment by name\n", + "print(client.get_experiment_by_name(\"hta-3-ae-long\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Find experiments matching some pattern\n", + "experiment_ids = [exp.experiment_id for exp in experiments if exp.name.startswith(\"hta\")]\n", + "\n", + "for exp_id in experiment_ids[-5::]:\n", + " print(exp_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Runs\n", + "Searching of Runs can be perfomed with some [built-in arguments](https://www.mlflow.org/docs/latest/python_api/mlflow.tracking.html#mlflow.tracking.MlflowClient.search_runs), or with basic SQL select queries passed to the `filter_string` argument. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "## Using order by a metric\n", + "runs = client.search_runs(experiment_ids=experiment_ids, max_results=50, order_by=[\"metrics.r_2\"])\n", + "\n", + "print(\"Number of runs:\", len(runs))\n", + "print(\"Example:\", runs[-1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Using an SQL filter string\n", + "experiment_ids = [exp.experiment_id for exp in experiments if exp.name.startswith(\"test_b\")]\n", + "runs = client.search_runs(experiment_ids=experiment_ids, filter_string='metrics.r_2 < 0.2', max_results=10) \n", + "\n", + "print(\"Number of runs:\", len(runs))\n", + "print(\"Example:\", runs[-1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are som handy tools using the `azureml-sdk` as well. For example, you can bring up a widget displaying information about a run, and get metrics as iterables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.run import Run\n", + "\n", + "experiment = Experiment(ws, experiments[-80].name)\n", + "azure_run = next(experiment.get_runs())\n", + "RunDetails(azure_run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Or do som things yourself\n", + "metrics = azure_run.get_metrics()\n", + "print(metrics.keys())\n", + "plt.plot(range(len(metrics[\"accuracy\"])), metrics[\"accuracy\"])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(azure_run.properties)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Artifacts\n", + "Artificacts are files, such JSON, images, pickled models, etc. The following are examples on explicitly uploading and downloading them on AzureML with a given `run_id`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import uuid\n", + "import json\n", + "import shutil\n", + "\n", + "run_id = client.list_run_infos(exp.experiment_id)[-1].run_id\n", + "art_id = f\"{uuid.uuid4().hex}\"\n", + "\n", + "# Upload artifacts\n", + "local_path = os.path.abspath(f\"./{exp.name}_{run_id}/\")\n", + "if os.path.isdir(local_path):\n", + " shutil.rmtree(local_path)\n", + "os.makedirs(local_path, exist_ok=True)\n", + "\n", + "json.dump({\"a\": 42.0, \"b\":\"text\"}, open(os.path.join(local_path, f\"{art_id}.json\"), \"w\"))\n", + "\n", + "client.log_artifacts(run_id, local_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get artifacts for a given Run\n", + "artifacts = client.list_artifacts(run_id)\n", + "\n", + "# Make a new path to save these to\n", + "new_local_path = os.path.join(local_path, \"downloaded\")\n", + "os.makedirs(new_local_path, exist_ok=True)\n", + "\n", + "# Iterate over Run's artifacts and save them\n", + "for f in artifacts:\n", + " client.download_artifacts(run_id=run_id, path=f.path, dst_path=local_path)\n", + " print(\"Downloaded:\", f)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/gordo/reporters/mlflow.py b/gordo/reporters/mlflow.py index d8e01014c..b2efdbfa8 100644 --- a/gordo/reporters/mlflow.py +++ b/gordo/reporters/mlflow.py @@ -284,7 +284,7 @@ def get_batch_kwargs(machine: Machine) -> dict: return {"metrics": metric_list, "params": param_list} -def get_kwargs_from_secret(secret_str: str, keys: List[str]) -> dict: +def get_kwargs_from_secret(keys: List[str], secret_str: Optional[str] = None) -> dict: """ Get keyword arguments dictionary from secrets environment variable @@ -344,7 +344,6 @@ def get_spauth_kwargs() -> dict: AzureML ServicePrincipalAuthentication keyword arguments. See :func:`gordo.builder.mlflow_utils.get_mlflow_client` """ - secret_str = os.getenv("DL_SERVICE_AUTH_STR") return ( get_kwargs_from_secret(