diff --git a/docs/source/tutorial_notebooks/pipelines.ipynb b/docs/source/tutorial_notebooks/pipelines.ipynb index b82bd50e..679579ac 100644 --- a/docs/source/tutorial_notebooks/pipelines.ipynb +++ b/docs/source/tutorial_notebooks/pipelines.ipynb @@ -36,8 +36,20 @@ }, "outputs": [], "source": [ + "# Come up with a random owner name to avoid clashes\n", + "from random import randint\n", + "OWNER = \"tutorial_\" + str(randint(0,int(1e6)))\n", + "\n", "import dataregistry\n", - "print(\"Working with dataregistry version:\", dataregistry.__version__)" + "print(f\"Working with dataregistry version: {dataregistry.__version__} as random owner {OWNER}\")" + ] + }, + { + "cell_type": "markdown", + "id": "18da2a5b-c3e4-4528-9197-1316388ee397", + "metadata": {}, + "source": [ + "**Note** that running some of the cells below may fail, especially if run multiple times. This will likely be from clashes with the unique constraints within the database (hopefully the error output is informative). In these events either; (1) run the cell above to establish a new database connection with a new random user, or (2) manually change the conflicting database column(s) that are clashing during registration." ] }, { @@ -65,8 +77,8 @@ "source": [ "from dataregistry import DataRegistry\n", "\n", - "# Establish connection to database (using defaults)\n", - "datareg = DataRegistry()\n", + "# Establish connection to the tutorial schema\n", + "datareg = DataRegistry(schema=\"tutorial_working\", owner=OWNER)\n", "\n", "# Register a new execution\n", "ex1_id = datareg.Registrar.execution.register(\n", @@ -98,14 +110,13 @@ "source": [ "# Register a dataset, chosing what execution it is associated with\n", "dataset_id, execution_id = datareg.Registrar.dataset.register(\n", - " \"pipeline_tutorial/dataset_1p1/\",\n", + " \"pipeline_tutorial:dataset_0p1\",\n", " \"0.0.1\",\n", " description=\"A directory structure output from pipeline stage 1\",\n", " old_location=\"/somewhere/on/machine/my-dataset/\",\n", " execution_id=ex1_id,\n", - " name=\"Dataset 1.1\",\n", " is_overwritable=True,\n", - " is_dummy=True\n", + " location_type=\"dummy\"\n", ")\n", "\n", "print(f\"Dataset {dataset_id} created, associated with execution {execution_id}\")" @@ -118,7 +129,7 @@ "source": [ "This is largely the same as the previous tutorial for registering a dataset, however now we are manually specifying the parent execution (`execution_id=ex1_id`).\n", "\n", - "Note `is_dummy=True` is a flag to ignore the data at `old_location` (i.e., nothing is copied), and just create an entry in the database. This is a flag for testing purposes only." + "Note `location_type=\"dummy\"` is a flag to ignore the data at `old_location` (i.e., nothing is copied), and just create an entry in the database. This is a flag for testing purposes only." ] }, { @@ -142,13 +153,12 @@ "source": [ "# Create a dataset and execution at the same time\n", "dataset_id, execution_id = datareg.Registrar.dataset.register(\n", - " \"pipeline_tutorial/dataset_1p1_with_execution_metadata/\",\n", + " \"pipeline_tutorial:dataset_1p1_with_execution_metadata\",\n", " \"0.0.1\",\n", " description=\"A directory structure output from pipeline stage 1\",\n", " old_location=\"/somewhere/on/machine/my-dataset/\",\n", - " name=\"Dataset 1.1\",\n", " is_overwritable=True,\n", - " is_dummy=True,\n", + " location_type=\"dummy\",\n", " execution_name=\"my execution\",\n", " execution_description=\"my execution description\"\n", ")\n", @@ -183,7 +193,7 @@ " \n", "\n", "\n", - "The DESC CO Group wants to enter this into the data registry, they would do it like so:" + "To enter this into the data registry, we would do the following:" ] }, { @@ -195,11 +205,6 @@ }, "outputs": [], "source": [ - "from dataregistry import DataRegistry\n", - "\n", - "# Establish connection to database, setting a default owner and owner_type for all registered datasets in this instance.\n", - "datareg = DataRegistry(owner=\"DESC CO Group\", owner_type=\"group\")\n", - "\n", "# Create execution for first pipeline stage\n", "ex1_id = datareg.Registrar.execution.register(\n", " \"pipeline-stage-1\"\n", @@ -207,45 +212,45 @@ "\n", "# Register datasets with first pipeline stage.\n", "dataset_id1, _ = datareg.Registrar.dataset.register(\n", - " \"pipeline_tutorial/dataset_1p1/\",\n", + " \"pipeline_tutorial:dataset_1p1\",\n", " \"0.0.1\",\n", " execution_id=ex1_id,\n", " is_overwritable=True,\n", - " is_dummy=True\n", + " location_type=\"dummy\"\n", ")\n", "\n", "dataset_id2, _ = datareg.Registrar.dataset.register(\n", - " \"pipeline_tutorial/dataset_1p2.db\",\n", + " \"pipeline_tutorial:dataset_1p2.db\",\n", " \"0.0.1\",\n", " execution_id=ex1_id,\n", " is_overwritable=True,\n", - " is_dummy=True\n", + " location_type=\"dummy\"\n", ")\n", "\n", "dataset_id3, _ = datareg.Registrar.dataset.register(\n", - " \"pipeline_tutorial/dataset_1p3.hdf5\",\n", + " \"pipeline_tutorial:dataset_1p3.hdf5\",\n", " \"0.0.1\",\n", " execution_id=ex1_id,\n", " is_overwritable=True,\n", - " is_dummy=True\n", + " location_type=\"dummy\"\n", ")\n", "\n", "# Register dataset and execution of second pipeline stage together\n", "dataset_id4, _ = datareg.Registrar.dataset.register(\n", - " \"pipeline_tutorial/dataset_2p1\",\n", + " \"pipeline_tutorial:dataset_2p1\",\n", " \"0.0.1\",\n", " is_overwritable=True,\n", - " is_dummy=True,\n", + " location_type=\"dummy\",\n", " input_datasets=[dataset_id1],\n", " execution_name=\"pipeline-stage-2\"\n", ")\n", "\n", "# Register dataset and execution of third pipeline stage together\n", "dataset_id5, _ = datareg.Registrar.dataset.register(\n", - " \"pipeline_tutorial/dataset_3p1\",\n", + " \"pipeline_tutorial:dataset_3p1\",\n", " \"0.0.1\",\n", " is_overwritable=True,\n", - " is_dummy=True,\n", + " location_type=\"dummy\",\n", " input_datasets=[dataset_id4],\n", " execution_name=\"pipeline-stage-3\"\n", ")" @@ -296,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.12" } }, "nbformat": 4,