From 77ac5cb733abc7d9cb560b389a5cf645d2aabb7d Mon Sep 17 00:00:00 2001 From: Matthias Schaub Date: Thu, 6 Feb 2025 11:45:37 +1300 Subject: [PATCH] test: psycopg vs asyncpg performance --- .../geodatabase/get_matched_roads.sql | 2 +- .../geodatabase/select_building_area.sql | 2 +- .../building_comparison/indicator.py | 20 +++++++++-- .../indicators/road_comparison/indicator.py | 24 +++++++++++-- poetry.lock | 34 ++++++++++++++----- pyproject.toml | 6 ++-- .../indicators/test_building_comparison.py | 32 +++++++++++++++++ .../indicators/test_road_comparison.py | 31 +++++++++++++++++ 8 files changed, 134 insertions(+), 17 deletions(-) diff --git a/ohsome_quality_api/geodatabase/get_matched_roads.sql b/ohsome_quality_api/geodatabase/get_matched_roads.sql index 97a4de180..44fee3d9b 100644 --- a/ohsome_quality_api/geodatabase/get_matched_roads.sql +++ b/ohsome_quality_api/geodatabase/get_matched_roads.sql @@ -1,7 +1,7 @@ WITH bpoly AS ( SELECT -- split mutlipolygon into list of polygons for more efficient processing - (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom + (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom ) SELECT SUM(cr.covered), diff --git a/ohsome_quality_api/geodatabase/select_building_area.sql b/ohsome_quality_api/geodatabase/select_building_area.sql index e37ca54b8..74c50c78c 100644 --- a/ohsome_quality_api/geodatabase/select_building_area.sql +++ b/ohsome_quality_api/geodatabase/select_building_area.sql @@ -1,7 +1,7 @@ WITH bpoly AS ( SELECT -- split mutlipolygon into list of polygons for more efficient processing - (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom + (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom ) SELECT SUM({table_name}.area) as area diff --git a/ohsome_quality_api/indicators/building_comparison/indicator.py b/ohsome_quality_api/indicators/building_comparison/indicator.py index 6cf5be9b1..d101aa3ba 100644 --- a/ohsome_quality_api/indicators/building_comparison/indicator.py +++ b/ohsome_quality_api/indicators/building_comparison/indicator.py @@ -277,7 +277,7 @@ def format_sources(self): # alru needs hashable type, therefore, use string instead of Feature -@alru_cache +# @alru_cache async def get_reference_building_area(feature_str: str, table_name: str) -> float: """Get the building area for a AoI from the EUBUCCO dataset.""" # TODO: https://github.com/GIScience/ohsome-quality-api/issues/746 @@ -295,11 +295,27 @@ async def get_reference_building_area(feature_str: str, table_name: str) -> floa geom = geojson.dumps(feature.geometry) async with await psycopg.AsyncConnection.connect(dns) as con: async with con.cursor() as cur: - await cur.execute(query.format(table_name=table_name), (geom,)) + await cur.execute(query.format(table_name=table_name, geom=geom)) res = await cur.fetchone() return res[0] or 0.0 +async def get_reference_building_area_asyncpg( + feature_str: str, table_name: str +) -> float: + file_path = os.path.join(db_client.WORKING_DIR, "select_building_area.sql") + with open(file_path, "r") as file: + query = file.read() + feature = geojson.loads(feature_str) + geom = geojson.dumps(feature.geometry) + + from ohsome_quality_api.geodatabase.client import get_connection + + async with get_connection() as conn: + result = await conn.fetchrow(query.format(table_name=table_name, geom=geom)) + return result[0] or 0.0 + + def load_datasets_metadata() -> dict: file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml") with open(file_path, "r") as f: diff --git a/ohsome_quality_api/indicators/road_comparison/indicator.py b/ohsome_quality_api/indicators/road_comparison/indicator.py index 08270700b..4adc2c89b 100644 --- a/ohsome_quality_api/indicators/road_comparison/indicator.py +++ b/ohsome_quality_api/indicators/road_comparison/indicator.py @@ -267,7 +267,7 @@ def format_sources(self): # alru needs hashable type, therefore, use string instead of Feature -@alru_cache +# @alru_cache async def get_matched_roadlengths( feature_str: str, table_name: str, @@ -290,13 +290,31 @@ async def get_matched_roadlengths( await cur.execute( query.format( table_name=table_name, - ), - (geom,), + geom=geom, + ) ) res = await cur.fetchone() return res[0], res[1] +async def get_matched_roadlengths_async( + feature_str: str, + table_name: str, +) -> tuple[float, float]: + file_path = os.path.join(db_client.WORKING_DIR, "get_matched_roads.sql") + with open(file_path, "r") as file: + query = file.read() + feature = geojson.loads(feature_str) + geom = geojson.dumps(feature.geometry) + table_name = table_name.replace(" ", "_") + + from ohsome_quality_api.geodatabase.client import get_connection + + async with get_connection() as conn: + result = await conn.fetchrow(query.format(table_name=table_name, geom=geom)) + return result[0], result[1] + + def load_datasets_metadata() -> dict: file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml") with open(file_path, "r") as f: diff --git a/poetry.lock b/poetry.lock index cae00d089..a33d53df3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1474,13 +1474,13 @@ certifi = "*" [[package]] name = "pytest" -version = "7.4.4" +version = "8.3.4" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, - {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, + {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, + {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, ] [package.dependencies] @@ -1488,11 +1488,29 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=0.12,<2.0" -tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} +pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.25.3" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3"}, + {file = "pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a"}, +] + +[package.dependencies] +pytest = ">=8.2,<9" [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] [[package]] name = "pytest-cov" @@ -2150,4 +2168,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "1e0dae8aedea1d37eed68299069e9f99e3951f5c9ed42cd8e0e010ce6ad3952b" +content-hash = "9d6493fc494c0ecd09cff4aa806bac8689af9ee65a8318b0f0acf42d466bacf7" diff --git a/pyproject.toml b/pyproject.toml index 3309ac9a3..3f20c0443 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,6 @@ requests = "^2.32.0" PyYAML = "^6.0" toml = "^0.10.2" httpx = "^0.23.0" -asyncpg = "^0.30" vcrpy = "^4.1.1" python-dateutil = "^2.8.2" scipy = "^1.9.3" @@ -46,10 +45,10 @@ plotly = "^5.16.1" psycopg = {extras = ["binary"], version = "^3.1"} async-lru = "^2.0.4" approvaltests = "^12.1.0" +asyncpg = "^0.30.0" [tool.poetry.dev-dependencies] pre-commit = "^3.2.1" -pytest = "^7.2.2" pytest-cov = "^4.0.0" pytest-mock = "^3.11.1" @@ -58,6 +57,8 @@ pytest-mock = "^3.11.1" [tool.poetry.group.dev.dependencies] ruff = "^0.7.3" +pytest = "^8.3.4" +pytest-asyncio = "^0.25.3" [build-system] requires = ["poetry-core"] @@ -83,3 +84,4 @@ select = [ [tool.pytest.ini_options] testpaths = ["tests"] filterwarnings = ["ignore::DeprecationWarning"] +addopts = "-s" # show print statements diff --git a/tests/integrationtests/indicators/test_building_comparison.py b/tests/integrationtests/indicators/test_building_comparison.py index 155b22695..819053027 100644 --- a/tests/integrationtests/indicators/test_building_comparison.py +++ b/tests/integrationtests/indicators/test_building_comparison.py @@ -364,3 +364,35 @@ def test_create_figure_building_area_zero( assert isinstance(indicator.result.figure, dict) assert indicator.result.figure["data"][0]["type"] == "pie" pgo.Figure(indicator.result.figure) + + +@pytest.mark.asyncio +async def test_compare_database_libraries_execution_time(feature_germany_berlin): + import time + import geojson + from ohsome_quality_api.indicators.building_comparison.indicator import ( + get_reference_building_area, + get_reference_building_area_asyncpg, + ) + + for dataset in ("eubucco", "microsoft_buildings"): + start_psycopg = time.time() + result_psycopg = await get_reference_building_area( + geojson.dumps(feature_germany_berlin), + dataset, + ) + end_psycopg = time.time() + time_psycopg = end_psycopg - start_psycopg + print(time_psycopg) # ~4-5 sec + + start_asyncpg = time.time() + result_asyncpg = await get_reference_building_area_asyncpg( + geojson.dumps(feature_germany_berlin), + dataset, + ) + end_asyncpg = time.time() + time_asyncpg = end_asyncpg - start_asyncpg + print(time_asyncpg) # ~4-5 sec + + assert result_psycopg == result_asyncpg + assert time_psycopg == pytest.approx(time_asyncpg, abs=1) # allow 1 seconds diff diff --git a/tests/integrationtests/indicators/test_road_comparison.py b/tests/integrationtests/indicators/test_road_comparison.py index 11e3e8233..a47c00b7c 100644 --- a/tests/integrationtests/indicators/test_road_comparison.py +++ b/tests/integrationtests/indicators/test_road_comparison.py @@ -244,3 +244,34 @@ def test_get_matched_roadlengths(): assert (1502620657, 1969546917) == asyncio.run( get_matched_roadlengths(json.dumps(polygon), "microsoft_roads_midpoint") ) + + +@pytest.mark.asyncio +async def test_compare_database_libraries_execution_time(feature_germany_berlin): + import time + import geojson + from ohsome_quality_api.indicators.road_comparison.indicator import ( + get_matched_roadlengths, + get_matched_roadlengths_asyncpg + ) + + start_psycopg = time.time() + result_psycopg = await get_matched_roadlengths( + geojson.dumps(feature_germany_berlin), + "microsoft_roads_midpoint", + ) + end_psycopg = time.time() + time_psycopg = end_psycopg - start_psycopg + print(time_psycopg) # ~4-5 sec + + start_asyncpg = time.time() + result_asyncpg = await get_matched_roadlengths_asyncpg( + geojson.dumps(feature_germany_berlin), + "microsoft_roads_midpoint", + ) + end_asyncpg = time.time() + time_asyncpg = end_asyncpg - start_asyncpg + print(time_asyncpg) # ~4-5 sec + + assert result_psycopg == result_asyncpg + assert time_psycopg == pytest.approx(time_asyncpg, abs=1) # allow 1 seconds diff