From 77ac5cb733abc7d9cb560b389a5cf645d2aabb7d Mon Sep 17 00:00:00 2001
From: Matthias Schaub <matthias.schaub@heigit.org>
Date: Thu, 6 Feb 2025 11:45:37 +1300
Subject: [PATCH] test: psycopg vs asyncpg performance

---
 .../geodatabase/get_matched_roads.sql         |  2 +-
 .../geodatabase/select_building_area.sql      |  2 +-
 .../building_comparison/indicator.py          | 20 +++++++++--
 .../indicators/road_comparison/indicator.py   | 24 +++++++++++--
 poetry.lock                                   | 34 ++++++++++++++-----
 pyproject.toml                                |  6 ++--
 .../indicators/test_building_comparison.py    | 32 +++++++++++++++++
 .../indicators/test_road_comparison.py        | 31 +++++++++++++++++
 8 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/ohsome_quality_api/geodatabase/get_matched_roads.sql b/ohsome_quality_api/geodatabase/get_matched_roads.sql
index 97a4de180..44fee3d9b 100644
--- a/ohsome_quality_api/geodatabase/get_matched_roads.sql
+++ b/ohsome_quality_api/geodatabase/get_matched_roads.sql
@@ -1,7 +1,7 @@
 WITH bpoly AS (
     SELECT
         -- split mutlipolygon into list of polygons for more efficient processing
-        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
+        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
 )
 SELECT
     SUM(cr.covered),
diff --git a/ohsome_quality_api/geodatabase/select_building_area.sql b/ohsome_quality_api/geodatabase/select_building_area.sql
index e37ca54b8..74c50c78c 100644
--- a/ohsome_quality_api/geodatabase/select_building_area.sql
+++ b/ohsome_quality_api/geodatabase/select_building_area.sql
@@ -1,7 +1,7 @@
 WITH bpoly AS (
     SELECT
         -- split mutlipolygon into list of polygons for more efficient processing
-        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
+        (ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
 )
 SELECT
     SUM({table_name}.area) as area
diff --git a/ohsome_quality_api/indicators/building_comparison/indicator.py b/ohsome_quality_api/indicators/building_comparison/indicator.py
index 6cf5be9b1..d101aa3ba 100644
--- a/ohsome_quality_api/indicators/building_comparison/indicator.py
+++ b/ohsome_quality_api/indicators/building_comparison/indicator.py
@@ -277,7 +277,7 @@ def format_sources(self):
 
 
 # alru needs hashable type, therefore, use string instead of Feature
-@alru_cache
+# @alru_cache
 async def get_reference_building_area(feature_str: str, table_name: str) -> float:
     """Get the building area for a AoI from the EUBUCCO dataset."""
     # TODO: https://github.com/GIScience/ohsome-quality-api/issues/746
@@ -295,11 +295,27 @@ async def get_reference_building_area(feature_str: str, table_name: str) -> floa
     geom = geojson.dumps(feature.geometry)
     async with await psycopg.AsyncConnection.connect(dns) as con:
         async with con.cursor() as cur:
-            await cur.execute(query.format(table_name=table_name), (geom,))
+            await cur.execute(query.format(table_name=table_name, geom=geom))
             res = await cur.fetchone()
     return res[0] or 0.0
 
 
+async def get_reference_building_area_asyncpg(
+    feature_str: str, table_name: str
+) -> float:
+    file_path = os.path.join(db_client.WORKING_DIR, "select_building_area.sql")
+    with open(file_path, "r") as file:
+        query = file.read()
+    feature = geojson.loads(feature_str)
+    geom = geojson.dumps(feature.geometry)
+
+    from ohsome_quality_api.geodatabase.client import get_connection
+
+    async with get_connection() as conn:
+        result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
+    return result[0] or 0.0
+
+
 def load_datasets_metadata() -> dict:
     file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
     with open(file_path, "r") as f:
diff --git a/ohsome_quality_api/indicators/road_comparison/indicator.py b/ohsome_quality_api/indicators/road_comparison/indicator.py
index 08270700b..4adc2c89b 100644
--- a/ohsome_quality_api/indicators/road_comparison/indicator.py
+++ b/ohsome_quality_api/indicators/road_comparison/indicator.py
@@ -267,7 +267,7 @@ def format_sources(self):
 
 
 # alru needs hashable type, therefore, use string instead of Feature
-@alru_cache
+# @alru_cache
 async def get_matched_roadlengths(
     feature_str: str,
     table_name: str,
@@ -290,13 +290,31 @@ async def get_matched_roadlengths(
             await cur.execute(
                 query.format(
                     table_name=table_name,
-                ),
-                (geom,),
+                    geom=geom,
+                )
             )
             res = await cur.fetchone()
     return res[0], res[1]
 
 
+async def get_matched_roadlengths_async(
+    feature_str: str,
+    table_name: str,
+) -> tuple[float, float]:
+    file_path = os.path.join(db_client.WORKING_DIR, "get_matched_roads.sql")
+    with open(file_path, "r") as file:
+        query = file.read()
+    feature = geojson.loads(feature_str)
+    geom = geojson.dumps(feature.geometry)
+    table_name = table_name.replace(" ", "_")
+
+    from ohsome_quality_api.geodatabase.client import get_connection
+
+    async with get_connection() as conn:
+        result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
+    return result[0], result[1]
+
+
 def load_datasets_metadata() -> dict:
     file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
     with open(file_path, "r") as f:
diff --git a/poetry.lock b/poetry.lock
index cae00d089..a33d53df3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1474,13 +1474,13 @@ certifi = "*"
 
 [[package]]
 name = "pytest"
-version = "7.4.4"
+version = "8.3.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
-    {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
+    {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"},
+    {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"},
 ]
 
 [package.dependencies]
@@ -1488,11 +1488,29 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""}
 exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
 iniconfig = "*"
 packaging = "*"
-pluggy = ">=0.12,<2.0"
-tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+pluggy = ">=1.5,<2"
+tomli = {version = ">=1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+
+[[package]]
+name = "pytest-asyncio"
+version = "0.25.3"
+description = "Pytest support for asyncio"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3"},
+    {file = "pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a"},
+]
+
+[package.dependencies]
+pytest = ">=8.2,<9"
 
 [package.extras]
-testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"]
+testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
 
 [[package]]
 name = "pytest-cov"
@@ -2150,4 +2168,4 @@ propcache = ">=0.2.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "1e0dae8aedea1d37eed68299069e9f99e3951f5c9ed42cd8e0e010ce6ad3952b"
+content-hash = "9d6493fc494c0ecd09cff4aa806bac8689af9ee65a8318b0f0acf42d466bacf7"
diff --git a/pyproject.toml b/pyproject.toml
index 3309ac9a3..3f20c0443 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,6 @@ requests = "^2.32.0"
 PyYAML = "^6.0"
 toml = "^0.10.2"
 httpx = "^0.23.0"
-asyncpg = "^0.30"
 vcrpy = "^4.1.1"
 python-dateutil = "^2.8.2"
 scipy = "^1.9.3"
@@ -46,10 +45,10 @@ plotly = "^5.16.1"
 psycopg = {extras = ["binary"], version = "^3.1"}
 async-lru = "^2.0.4"
 approvaltests = "^12.1.0"
+asyncpg = "^0.30.0"
 
 [tool.poetry.dev-dependencies]
 pre-commit = "^3.2.1"
-pytest = "^7.2.2"
 pytest-cov = "^4.0.0"
 pytest-mock = "^3.11.1"
 
@@ -58,6 +57,8 @@ pytest-mock = "^3.11.1"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "^0.7.3"
+pytest = "^8.3.4"
+pytest-asyncio = "^0.25.3"
 
 [build-system]
 requires = ["poetry-core"]
@@ -83,3 +84,4 @@ select = [
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 filterwarnings = ["ignore::DeprecationWarning"]
+addopts = "-s"  # show print statements
diff --git a/tests/integrationtests/indicators/test_building_comparison.py b/tests/integrationtests/indicators/test_building_comparison.py
index 155b22695..819053027 100644
--- a/tests/integrationtests/indicators/test_building_comparison.py
+++ b/tests/integrationtests/indicators/test_building_comparison.py
@@ -364,3 +364,35 @@ def test_create_figure_building_area_zero(
         assert isinstance(indicator.result.figure, dict)
         assert indicator.result.figure["data"][0]["type"] == "pie"
         pgo.Figure(indicator.result.figure)
+
+
+@pytest.mark.asyncio
+async def test_compare_database_libraries_execution_time(feature_germany_berlin):
+    import time
+    import geojson
+    from ohsome_quality_api.indicators.building_comparison.indicator import (
+        get_reference_building_area,
+        get_reference_building_area_asyncpg,
+    )
+
+    for dataset in ("eubucco", "microsoft_buildings"):
+        start_psycopg = time.time()
+        result_psycopg = await get_reference_building_area(
+            geojson.dumps(feature_germany_berlin),
+            dataset,
+        )
+        end_psycopg = time.time()
+        time_psycopg = end_psycopg - start_psycopg
+        print(time_psycopg)  # ~4-5 sec
+
+        start_asyncpg = time.time()
+        result_asyncpg = await get_reference_building_area_asyncpg(
+            geojson.dumps(feature_germany_berlin),
+            dataset,
+        )
+        end_asyncpg = time.time()
+        time_asyncpg = end_asyncpg - start_asyncpg
+        print(time_asyncpg)  # ~4-5 sec
+
+        assert result_psycopg == result_asyncpg
+        assert time_psycopg == pytest.approx(time_asyncpg, abs=1)  # allow 1 seconds diff
diff --git a/tests/integrationtests/indicators/test_road_comparison.py b/tests/integrationtests/indicators/test_road_comparison.py
index 11e3e8233..a47c00b7c 100644
--- a/tests/integrationtests/indicators/test_road_comparison.py
+++ b/tests/integrationtests/indicators/test_road_comparison.py
@@ -244,3 +244,34 @@ def test_get_matched_roadlengths():
     assert (1502620657, 1969546917) == asyncio.run(
         get_matched_roadlengths(json.dumps(polygon), "microsoft_roads_midpoint")
     )
+
+
+@pytest.mark.asyncio
+async def test_compare_database_libraries_execution_time(feature_germany_berlin):
+    import time
+    import geojson
+    from ohsome_quality_api.indicators.road_comparison.indicator import (
+        get_matched_roadlengths,
+        get_matched_roadlengths_asyncpg
+    )
+
+    start_psycopg = time.time()
+    result_psycopg = await get_matched_roadlengths(
+        geojson.dumps(feature_germany_berlin),
+        "microsoft_roads_midpoint",
+    )
+    end_psycopg = time.time()
+    time_psycopg = end_psycopg - start_psycopg
+    print(time_psycopg)  # ~4-5 sec
+
+    start_asyncpg = time.time()
+    result_asyncpg = await get_matched_roadlengths_asyncpg(
+        geojson.dumps(feature_germany_berlin),
+        "microsoft_roads_midpoint",
+    )
+    end_asyncpg = time.time()
+    time_asyncpg = end_asyncpg - start_asyncpg
+    print(time_asyncpg)  # ~4-5 sec
+
+    assert result_psycopg == result_asyncpg
+    assert time_psycopg == pytest.approx(time_asyncpg, abs=1)  # allow 1 seconds diff