Skip to content

Commit

Permalink
Update pipeline tutorial notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
stuartmcalpine committed Oct 24, 2024
1 parent 81949eb commit 7801e8d
Showing 1 changed file with 32 additions and 27 deletions.
59 changes: 32 additions & 27 deletions docs/source/tutorial_notebooks/pipelines.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,20 @@
},
"outputs": [],
"source": [
"# Come up with a random owner name to avoid clashes\n",
"from random import randint\n",
"OWNER = \"tutorial_\" + str(randint(0,int(1e6)))\n",
"\n",
"import dataregistry\n",
"print(\"Working with dataregistry version:\", dataregistry.__version__)"
"print(f\"Working with dataregistry version: {dataregistry.__version__} as random owner {OWNER}\")"
]
},
{
"cell_type": "markdown",
"id": "18da2a5b-c3e4-4528-9197-1316388ee397",
"metadata": {},
"source": [
"**Note** that running some of the cells below may fail, especially if run multiple times. This will likely be from clashes with the unique constraints within the database (hopefully the error output is informative). In these events either; (1) run the cell above to establish a new database connection with a new random user, or (2) manually change the conflicting database column(s) that are clashing during registration."
]
},
{
Expand Down Expand Up @@ -65,8 +77,8 @@
"source": [
"from dataregistry import DataRegistry\n",
"\n",
"# Establish connection to database (using defaults)\n",
"datareg = DataRegistry()\n",
"# Establish connection to the tutorial schema\n",
"datareg = DataRegistry(schema=\"tutorial_working\", owner=OWNER)\n",
"\n",
"# Register a new execution\n",
"ex1_id = datareg.Registrar.execution.register(\n",
Expand Down Expand Up @@ -98,14 +110,13 @@
"source": [
"# Register a dataset, chosing what execution it is associated with\n",
"dataset_id, execution_id = datareg.Registrar.dataset.register(\n",
" \"pipeline_tutorial/dataset_1p1/\",\n",
" \"pipeline_tutorial:dataset_0p1\",\n",
" \"0.0.1\",\n",
" description=\"A directory structure output from pipeline stage 1\",\n",
" old_location=\"/somewhere/on/machine/my-dataset/\",\n",
" execution_id=ex1_id,\n",
" name=\"Dataset 1.1\",\n",
" is_overwritable=True,\n",
" is_dummy=True\n",
" location_type=\"dummy\"\n",
")\n",
"\n",
"print(f\"Dataset {dataset_id} created, associated with execution {execution_id}\")"
Expand All @@ -118,7 +129,7 @@
"source": [
"This is largely the same as the previous tutorial for registering a dataset, however now we are manually specifying the parent execution (`execution_id=ex1_id`).\n",
"\n",
"Note `is_dummy=True` is a flag to ignore the data at `old_location` (i.e., nothing is copied), and just create an entry in the database. This is a flag for testing purposes only."
"Note `location_type=\"dummy\"` is a flag to ignore the data at `old_location` (i.e., nothing is copied), and just create an entry in the database. This is a flag for testing purposes only."
]
},
{
Expand All @@ -142,13 +153,12 @@
"source": [
"# Create a dataset and execution at the same time\n",
"dataset_id, execution_id = datareg.Registrar.dataset.register(\n",
" \"pipeline_tutorial/dataset_1p1_with_execution_metadata/\",\n",
" \"pipeline_tutorial:dataset_1p1_with_execution_metadata\",\n",
" \"0.0.1\",\n",
" description=\"A directory structure output from pipeline stage 1\",\n",
" old_location=\"/somewhere/on/machine/my-dataset/\",\n",
" name=\"Dataset 1.1\",\n",
" is_overwritable=True,\n",
" is_dummy=True,\n",
" location_type=\"dummy\",\n",
" execution_name=\"my execution\",\n",
" execution_description=\"my execution description\"\n",
")\n",
Expand Down Expand Up @@ -183,7 +193,7 @@
" <img src=\"images/pipeline_example.png\" width=\"800\" style=\"float: left; margin-right: 10px;\">\n",
"</div>\n",
"\n",
"The DESC CO Group wants to enter this into the data registry, they would do it like so:"
"To enter this into the data registry, we would do the following:"
]
},
{
Expand All @@ -195,57 +205,52 @@
},
"outputs": [],
"source": [
"from dataregistry import DataRegistry\n",
"\n",
"# Establish connection to database, setting a default owner and owner_type for all registered datasets in this instance.\n",
"datareg = DataRegistry(owner=\"DESC CO Group\", owner_type=\"group\")\n",
"\n",
"# Create execution for first pipeline stage\n",
"ex1_id = datareg.Registrar.execution.register(\n",
" \"pipeline-stage-1\"\n",
")\n",
"\n",
"# Register datasets with first pipeline stage.\n",
"dataset_id1, _ = datareg.Registrar.dataset.register(\n",
" \"pipeline_tutorial/dataset_1p1/\",\n",
" \"pipeline_tutorial:dataset_1p1\",\n",
" \"0.0.1\",\n",
" execution_id=ex1_id,\n",
" is_overwritable=True,\n",
" is_dummy=True\n",
" location_type=\"dummy\"\n",
")\n",
"\n",
"dataset_id2, _ = datareg.Registrar.dataset.register(\n",
" \"pipeline_tutorial/dataset_1p2.db\",\n",
" \"pipeline_tutorial:dataset_1p2.db\",\n",
" \"0.0.1\",\n",
" execution_id=ex1_id,\n",
" is_overwritable=True,\n",
" is_dummy=True\n",
" location_type=\"dummy\"\n",
")\n",
"\n",
"dataset_id3, _ = datareg.Registrar.dataset.register(\n",
" \"pipeline_tutorial/dataset_1p3.hdf5\",\n",
" \"pipeline_tutorial:dataset_1p3.hdf5\",\n",
" \"0.0.1\",\n",
" execution_id=ex1_id,\n",
" is_overwritable=True,\n",
" is_dummy=True\n",
" location_type=\"dummy\"\n",
")\n",
"\n",
"# Register dataset and execution of second pipeline stage together\n",
"dataset_id4, _ = datareg.Registrar.dataset.register(\n",
" \"pipeline_tutorial/dataset_2p1\",\n",
" \"pipeline_tutorial:dataset_2p1\",\n",
" \"0.0.1\",\n",
" is_overwritable=True,\n",
" is_dummy=True,\n",
" location_type=\"dummy\",\n",
" input_datasets=[dataset_id1],\n",
" execution_name=\"pipeline-stage-2\"\n",
")\n",
"\n",
"# Register dataset and execution of third pipeline stage together\n",
"dataset_id5, _ = datareg.Registrar.dataset.register(\n",
" \"pipeline_tutorial/dataset_3p1\",\n",
" \"pipeline_tutorial:dataset_3p1\",\n",
" \"0.0.1\",\n",
" is_overwritable=True,\n",
" is_dummy=True,\n",
" location_type=\"dummy\",\n",
" input_datasets=[dataset_id4],\n",
" execution_name=\"pipeline-stage-3\"\n",
")"
Expand Down Expand Up @@ -296,7 +301,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 7801e8d

Please sign in to comment.