diff --git a/notebooks/03b_highways_timeline_DuckDB_only.ipynb b/notebooks/03b_highways_timeline_DuckDB_only.ipynb new file mode 100644 index 0000000..5af4a14 --- /dev/null +++ b/notebooks/03b_highways_timeline_DuckDB_only.ipynb @@ -0,0 +1,595 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4db2bafd-697e-4c56-a0aa-9aba263c72d6", + "metadata": {}, + "source": [ + "# Development of highways over time\n", + "In this notebook we demonstrate how to analyze and visualize the development highways in OSM over time.\n", + "\n", + "These are the steps you see further down:\n", + "\n", + "* Set the connection parameters.\n", + "* Prepare your input parameters, e.g. define area of interest and time interval.\n", + "* **Download data** using DuckDB.\n", + "* Create the mapping saturation **plot**.\n", + "* Create a **Map**, an **interactive Slider** to filter the map data. " + ] + }, + { + "cell_type": "markdown", + "id": "3d674481-bbd1-41e8-af30-06f55ddca03c", + "metadata": {}, + "source": [ + "# Getting started\n", + "Set connection params." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c9e38420-73a3-45d6-a84a-fb5448b3dedc", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "s3_user = \"admin\" # add your user here\n", + "s3_password = \"frephIkHageynLangIlpordUrd9FracGorAsEuvafoodVoxwoycsyohoddowtOct\" # add your password here" + ] + }, + { + "cell_type": "markdown", + "id": "ed4463d8-471d-4a23-9286-1e7cc8048e92", + "metadata": {}, + "source": [ + "Configure DuckDB." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "86bcd8e2-9285-44f2-bd3c-948fe08c545f", + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "\n", + "con = duckdb.connect(\n", + " config={\n", + " 'threads': 32,\n", + " 'max_memory': '50GB'\n", + " }\n", + ")\n", + "con.install_extension(\"spatial\")\n", + "con.load_extension(\"spatial\")" + ] + }, + { + "cell_type": "markdown", + "id": "1e729452-d070-4d4e-a59c-58cf7d9b54e0", + "metadata": {}, + "source": [ + "Set connection to MinIO object storage." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "da021474-99e1-40b2-871f-0a817738e60f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌─────────┐\n", + "│ Success │\n", + "│ boolean │\n", + "├─────────┤\n", + "│ true │\n", + "└─────────┘\n", + "\n" + ] + } + ], + "source": [ + "query = f\"\"\"\n", + "DROP SECRET IF EXISTS \"__default_s3\";\n", + "CREATE SECRET (\n", + " TYPE S3,\n", + " KEY_ID '{s3_user}',\n", + " SECRET '{s3_password}',\n", + " REGION 'eu-central-1',\n", + " endpoint 'sotm2024.minio.heigit.org',\n", + " use_ssl true,\n", + " url_style 'path'\n", + " );\n", + "\"\"\"\n", + "con.sql(query).show()" + ] + }, + { + "cell_type": "markdown", + "id": "4d0dd278-6f1a-44ae-bf06-fbaa885a28ae", + "metadata": {}, + "source": [ + "## Prepare the input parameters for your analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fdb4522d-9326-490d-b4fd-8999bda14553", + "metadata": {}, + "outputs": [], + "source": [ + "# Set s3 path for parquet input data\n", + "parquet_data_path = \"s3a://heigit-ohsome-sotm24/data/geo_sort_ext/contributions_germany/**\"\n", + "#parquet_data_path = \"s3a://heigit-ohsome-sotm24/data/geo_sort_ext/contributions/**\"\n", + "\n", + "\n", + "# Define location filter\n", + "bboxes = {\n", + " 'heidelberg': (8.629761, 49.379556, 8.742371, 49.437890),\n", + " 'nairobi': (36.650938, -1.444471, 37.103887, -1.163522),\n", + " 'mannheim': (8.41416, 49.410362, 8.58999, 49.590489), \n", + " 'berlin': (13.088345, 52.338271, 13.761161, 52.675509)\n", + "}\n", + "\n", + "selected_region = 'heidelberg'\n", + "xmin, ymin, xmax, ymax = bboxes[selected_region]\n", + "#area_of_interest_file =f\"../data/{selected_region}.geojson\"\n", + "area_of_interest_file =f\"{selected_region}.geojson\"\n", + "\n", + "# Define geometry type filter\n", + "geometry_type = 'LineString'\n", + "\n", + "# Define time range\n", + "min_timestamp = '2008-01-01'\n", + "max_timestamp = '2015-01-01'" + ] + }, + { + "cell_type": "markdown", + "id": "173104f5-0439-4259-97dd-82cc0ecbea5a", + "metadata": {}, + "source": [ + "## Get the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f6af5983-b350-4a8a-81b1-cdd27a327896", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "48c2ca24a58d4e908aa5cd28df0b46c2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "download took 14.692 sec.\n" + ] + } + ], + "source": [ + "import time\n", + "start_time = time.time()\n", + "\n", + "query = f\"\"\"\n", + "DROP TABLE IF EXISTS osm_data;\n", + "CREATE TABLE osm_data AS \n", + "(\n", + "SELECT\n", + " a.osm_id,\n", + " a.osm_version,\n", + " a.status,\n", + " a.valid_from,\n", + " a.valid_to,\n", + " a.tags,\n", + " ST_Intersection(ST_GeomFromText(a.geometry), aoi.geom) as clipped_geometry,\n", + " ST_Length_Spheroid(clipped_geometry) / 1000 as length_km\n", + "FROM\n", + " read_parquet('{parquet_data_path}', hive_partitioning=true) as a,\n", + " st_read('{area_of_interest_file}') as aoi\n", + "WHERE 1=1\n", + " and (status = 'latest' or status = 'history')\n", + " and geometry_type = '{geometry_type}'\n", + " and tags['highway'][1] is not null\n", + " -- spatial filtering part\n", + " and (a.bbox.xmax >= {xmin} AND a.bbox.xmin <= {xmax})\n", + " and (a.bbox.ymax >= {ymin} AND a.bbox.ymin <= {ymax})\n", + " and ST_Intersects(st_GeomFromText(a.geometry), aoi.geom)\n", + ")\n", + ";\n", + "\"\"\"\n", + "con.sql(query)\n", + "\n", + "download_time = round(time.time() - start_time, 3)\n", + "print(f\"download took {download_time} sec.\")" + ] + }, + { + "cell_type": "markdown", + "id": "f23808c7-ce8f-4612-b90f-24b19f6c8879", + "metadata": {}, + "source": [ + "## Plot chart" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "24a1943a-9f21-434d-b682-2c18daa94345", + "metadata": {}, + "outputs": [], + "source": [ + "query = f\"\"\"\n", + "DROP TABLE IF EXISTS osm_stats;\n", + "CREATE TABLE osm_stats AS\n", + "(\n", + "with \n", + "snapshots as (\n", + " SELECT \n", + " row_number() OVER () as snapshot_index,\n", + " range AS datetime_key \n", + " FROM RANGE(DATE '{min_timestamp}', DATE '{max_timestamp}', INTERVAL 1 MONTH)\n", + ")\n", + "SELECT\n", + " snapshots.snapshot_index,\n", + " snapshots.datetime_key,\n", + " osm_id,\n", + " valid_from,\n", + " valid_to,\n", + " length_km,\n", + " clipped_geometry\n", + "FROM snapshots\n", + "JOIN osm_data on (\n", + " snapshots.datetime_key >= osm_data.valid_from\n", + " and\n", + " snapshots.datetime_key <= osm_data.valid_to\n", + " )\n", + "ORDER BY snapshot_index\n", + ");\n", + "\"\"\"\n", + "con.sql(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "07dd3e0f-4002-4669-af00-7a300198c335", + "metadata": {}, + "outputs": [ + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "