From 4fc11e95ea4033c8d49e47b52f9f3b77a5bfbaa4 Mon Sep 17 00:00:00 2001
From: h9b
We have optimized the structure for all tables in the geo_sort namespace to filter for status, geometry_type and location.\n", - " \n", - ":::" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d2125907-3028-4e76-ae26-39ac7adf0f94", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "download took 27.171 sec.\n" - ] - } - ], - "source": [ - "import time\n", - "\n", - "start_time = time.time()\n", - "\n", - "icebergtable.scan(\n", - " row_filter= (\n", - " f\"status = '{status}'\"\n", - " f\"and geometry_type = '{geometry_type}'\"\n", - " #f\"and (bbox.xmax >= {xmin} and bbox.xmin <= {xmax})\"\n", - " #f\"and (bbox.ymax >= {ymin} and bbox.ymin <= {ymax})\"\n", - " # optional timestamp filter\n", - " # f\"and valid_from >= '{min_timestamp}'\"\n", - " # f\"and valid_from < '{max_timestamp}'\"\n", - " ),\n", - " selected_fields=selected_fields,\n", - " # optional: limit the number of features downloadd \n", - " limit=25000\n", - ").to_duckdb('osm_data',connection=con)\n", - "\n", - "download_time = round(time.time() - start_time, 3)\n", - "print(f\"download took {download_time} sec.\")" - ] - }, - { - "cell_type": "markdown", - "id": "73b8b5e7-41b7-4c3f-a901-3c5403dc4150", - "metadata": {}, - "source": [ - "# DuckDB to GeoPackage" - ] - }, - { - "cell_type": "markdown", - "id": "3500cf69-363b-452e-be63-0f969b536e5b", - "metadata": {}, - "source": [ - "Show the structure of the data we have just downloaded." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "d738f1c2-cbaf-49ce-9350-e6b28e2414b3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────────┬───────────────────────┬─────────┬─────────┬─────────┬─────────┐\n", - "│ column_name │ column_type │ null │ key │ default │ extra │\n", - "│ varchar │ varchar │ varchar │ varchar │ varchar │ varchar │\n", - "├─────────────┼───────────────────────┼─────────┼─────────┼─────────┼─────────┤\n", - "│ user_id │ INTEGER │ YES │ NULL │ NULL │ NULL │\n", - "│ valid_from │ TIMESTAMP │ YES │ NULL │ NULL │ NULL │\n", - "│ osm_id │ VARCHAR │ YES │ NULL │ NULL │ NULL │\n", - "│ osm_version │ INTEGER │ YES │ NULL │ NULL │ NULL │\n", - "│ tags │ MAP(VARCHAR, VARCHAR) │ YES │ NULL │ NULL │ NULL │\n", - "│ geometry │ VARCHAR │ YES │ NULL │ NULL │ NULL │\n", - "└─────────────┴───────────────────────┴─────────┴─────────┴─────────┴─────────┘" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query = \"\"\"\n", - "DESCRIBE\n", - "FROM osm_data;\n", - "\"\"\"\n", - "con.sql(query)" - ] - }, - { - "cell_type": "markdown", - "id": "60155988-a105-4a04-a305-b635e6b41ce7", - "metadata": {}, - "source": [ - "Inspect a few features." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "7fc355bf-cf38-487d-9b80-5cc02b9edc31", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌─────────┬─────────────────────┬──────────────────┬─────────────┬──────────────────────┬──────────────────────────────┐\n", - "│ user_id │ valid_from │ osm_id │ osm_version │ tags │ geometry │\n", - "│ int32 │ timestamp │ varchar │ int32 │ map(varchar, varch… │ varchar │\n", - "├─────────┼─────────────────────┼──────────────────┼─────────────┼──────────────────────┼──────────────────────────────┤\n", - "│ 1462877 │ 2013-05-19 10:35:00 │ way/222126104 │ 1 │ {natural=bare_rock… │ POLYGON ((-179.92694939999… │\n", - "│ 1462877 │ 2013-05-19 10:35:28 │ way/222126268 │ 1 │ {natural=bare_rock… │ POLYGON ((-179.7100523 -85… │\n", - "│ 1462877 │ 2013-05-19 10:36:17 │ relation/2939698 │ 1 │ {natural=bare_rock… │ POLYGON ((-178.7729392 -85… │\n", - "│ 1462877 │ 2013-05-19 10:36:26 │ relation/2939760 │ 1 │ {natural=glacier, … │ POLYGON ((-178.7067439 -85… │\n", - "│ 1462877 │ 2013-05-19 10:36:18 │ relation/2939702 │ 1 │ {natural=bare_rock… │ POLYGON ((-178.6380738 -85… │\n", - "└─────────┴─────────────────────┴──────────────────┴─────────────┴──────────────────────┴──────────────────────────────┘" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query = \"\"\"\n", - "SELECT *\n", - "FROM osm_data\n", - "LIMIT 5;\n", - "\"\"\"\n", - "con.sql(query)" - ] - }, - { - "cell_type": "markdown", - "id": "9a670374-387b-4848-a295-cb2f2edc33b7", - "metadata": {}, - "source": [ - "Count the number of features in the table when applying a more detailed tag filter.\n", - "\n", - "Furthermore, apply detailed geometry filter for Heidelberg boundary." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "d070a68f-1615-4e5f-94ea-3cd837f2be4b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "┌──────────────┐\n", - "│ count_star() │\n", - "│ int64 │\n", - "├──────────────┤\n", - "│ 0 │\n", - "└──────────────┘" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query = \"\"\"\n", - "SELECT count(*)\n", - "FROM\n", - " osm_data,\n", - " st_read('../data/Heidelberg.geojson') as heidelberg\n", - "WHERE 1=1\n", - " -- filter for all boundaries in OSM --> boundary=*\n", - " and list_contains(map_keys(tags), 'boundary')\n", - " -- intersect osm data with Heidelberg boundary\n", - " and ST_Intersects(st_GeomFromText(osm_data.geometry), heidelberg.geom)\n", - "\"\"\"\n", - "con.sql(query)" - ] - }, - { - "cell_type": "markdown", - "id": "7c406784-9c12-41f9-9f1e-87b9c571cdc3", - "metadata": {}, - "source": [ - "Export as GeoPackage via GeoPandas." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "3648c939-7813-4be3-b1d0-b51d3fbad4de", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "\n", - "query = f\"\"\"\n", - " SELECT *\n", - " FROM\n", - " osm_data,\n", - " st_read('../data/Heidelberg.geojson') as heidelberg\n", - " WHERE 1=1\n", - " -- filter for all boundaries in OSM --> boundary=*\n", - " and list_contains(map_keys(tags), 'boundary')\n", - " -- intersect osm data with Heidelberg boundary\n", - " and ST_Intersects(st_GeomFromText(osm_data.geometry), heidelberg.geom)\n", - "\"\"\"\n", - "df = con.sql(query).df()\n", - "\n", - "gdf = gpd.GeoDataFrame(\n", - " df,\n", - " geometry=gpd.GeoSeries.from_wkt(df['geometry'])\n", - ").set_crs('epsg:4326')\n", - "\n", - "output_filename = \"heidelberg_osm_data.gpkg\"\n", - "gdf.to_file(output_filename, driver='GPKG')" - ] - }, - { - "cell_type": "markdown", - "id": "50bf5649-9176-4e7f-83ac-752fc9e6faae", - "metadata": {}, - "source": [ - "# Work with the data in QGIS\n", - "Add your geopackage file in QGIS, e.g. via drag-and-drop or through file manager." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d6087000-150b-4580-a3ed-a89236fee716", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}