Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: psycopg vs asyncpg performance #876

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ohsome_quality_api/geodatabase/get_matched_roads.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
WITH bpoly AS (
SELECT
-- split mutlipolygon into list of polygons for more efficient processing
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
)
SELECT
SUM(cr.covered),
Expand Down
2 changes: 1 addition & 1 deletion ohsome_quality_api/geodatabase/select_building_area.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
WITH bpoly AS (
SELECT
-- split mutlipolygon into list of polygons for more efficient processing
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON (%s), 4326))).geom AS geom
(ST_DUMP (ST_Setsrid (ST_GeomFromGeoJSON ('{geom}'), 4326))).geom AS geom
)
SELECT
SUM({table_name}.area) as area
Expand Down
20 changes: 18 additions & 2 deletions ohsome_quality_api/indicators/building_comparison/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def format_sources(self):


# alru needs hashable type, therefore, use string instead of Feature
@alru_cache
# @alru_cache
async def get_reference_building_area(feature_str: str, table_name: str) -> float:
"""Get the building area for a AoI from the EUBUCCO dataset."""
# TODO: https://github.com/GIScience/ohsome-quality-api/issues/746
Expand All @@ -295,11 +295,27 @@ async def get_reference_building_area(feature_str: str, table_name: str) -> floa
geom = geojson.dumps(feature.geometry)
async with await psycopg.AsyncConnection.connect(dns) as con:
async with con.cursor() as cur:
await cur.execute(query.format(table_name=table_name), (geom,))
await cur.execute(query.format(table_name=table_name, geom=geom))
res = await cur.fetchone()
return res[0] or 0.0


async def get_reference_building_area_asyncpg(
feature_str: str, table_name: str
) -> float:
file_path = os.path.join(db_client.WORKING_DIR, "select_building_area.sql")
with open(file_path, "r") as file:
query = file.read()
feature = geojson.loads(feature_str)
geom = geojson.dumps(feature.geometry)

from ohsome_quality_api.geodatabase.client import get_connection

async with get_connection() as conn:
result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
return result[0] or 0.0


def load_datasets_metadata() -> dict:
file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
with open(file_path, "r") as f:
Expand Down
24 changes: 21 additions & 3 deletions ohsome_quality_api/indicators/road_comparison/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def format_sources(self):


# alru needs hashable type, therefore, use string instead of Feature
@alru_cache
# @alru_cache
async def get_matched_roadlengths(
feature_str: str,
table_name: str,
Expand All @@ -290,13 +290,31 @@ async def get_matched_roadlengths(
await cur.execute(
query.format(
table_name=table_name,
),
(geom,),
geom=geom,
)
)
res = await cur.fetchone()
return res[0], res[1]


async def get_matched_roadlengths_async(
feature_str: str,
table_name: str,
) -> tuple[float, float]:
file_path = os.path.join(db_client.WORKING_DIR, "get_matched_roads.sql")
with open(file_path, "r") as file:
query = file.read()
feature = geojson.loads(feature_str)
geom = geojson.dumps(feature.geometry)
table_name = table_name.replace(" ", "_")

from ohsome_quality_api.geodatabase.client import get_connection

async with get_connection() as conn:
result = await conn.fetchrow(query.format(table_name=table_name, geom=geom))
return result[0], result[1]


def load_datasets_metadata() -> dict:
file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
with open(file_path, "r") as f:
Expand Down
34 changes: 26 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ requests = "^2.32.0"
PyYAML = "^6.0"
toml = "^0.10.2"
httpx = "^0.23.0"
asyncpg = "^0.30"
vcrpy = "^4.1.1"
python-dateutil = "^2.8.2"
scipy = "^1.9.3"
Expand All @@ -46,10 +45,10 @@ plotly = "^5.16.1"
psycopg = {extras = ["binary"], version = "^3.1"}
async-lru = "^2.0.4"
approvaltests = "^12.1.0"
asyncpg = "^0.30.0"

[tool.poetry.dev-dependencies]
pre-commit = "^3.2.1"
pytest = "^7.2.2"
pytest-cov = "^4.0.0"
pytest-mock = "^3.11.1"

Expand All @@ -58,6 +57,8 @@ pytest-mock = "^3.11.1"

[tool.poetry.group.dev.dependencies]
ruff = "^0.7.3"
pytest = "^8.3.4"
pytest-asyncio = "^0.25.3"

[build-system]
requires = ["poetry-core"]
Expand All @@ -83,3 +84,4 @@ select = [
[tool.pytest.ini_options]
testpaths = ["tests"]
filterwarnings = ["ignore::DeprecationWarning"]
addopts = "-s" # show print statements
32 changes: 32 additions & 0 deletions tests/integrationtests/indicators/test_building_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,35 @@ def test_create_figure_building_area_zero(
assert isinstance(indicator.result.figure, dict)
assert indicator.result.figure["data"][0]["type"] == "pie"
pgo.Figure(indicator.result.figure)


@pytest.mark.asyncio
async def test_compare_database_libraries_execution_time(feature_germany_berlin):
import time
import geojson
from ohsome_quality_api.indicators.building_comparison.indicator import (
get_reference_building_area,
get_reference_building_area_asyncpg,
)

for dataset in ("eubucco", "microsoft_buildings"):
start_psycopg = time.time()
result_psycopg = await get_reference_building_area(
geojson.dumps(feature_germany_berlin),
dataset,
)
end_psycopg = time.time()
time_psycopg = end_psycopg - start_psycopg
print(time_psycopg) # ~4-5 sec

start_asyncpg = time.time()
result_asyncpg = await get_reference_building_area_asyncpg(
geojson.dumps(feature_germany_berlin),
dataset,
)
end_asyncpg = time.time()
time_asyncpg = end_asyncpg - start_asyncpg
print(time_asyncpg) # ~4-5 sec

assert result_psycopg == result_asyncpg
assert time_psycopg == pytest.approx(time_asyncpg, abs=1) # allow 1 seconds diff
31 changes: 31 additions & 0 deletions tests/integrationtests/indicators/test_road_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,34 @@ def test_get_matched_roadlengths():
assert (1502620657, 1969546917) == asyncio.run(
get_matched_roadlengths(json.dumps(polygon), "microsoft_roads_midpoint")
)


@pytest.mark.asyncio
async def test_compare_database_libraries_execution_time(feature_germany_berlin):
import time
import geojson
from ohsome_quality_api.indicators.road_comparison.indicator import (
get_matched_roadlengths,
get_matched_roadlengths_asyncpg
)

start_psycopg = time.time()
result_psycopg = await get_matched_roadlengths(
geojson.dumps(feature_germany_berlin),
"microsoft_roads_midpoint",
)
end_psycopg = time.time()
time_psycopg = end_psycopg - start_psycopg
print(time_psycopg) # ~4-5 sec

start_asyncpg = time.time()
result_asyncpg = await get_matched_roadlengths_asyncpg(
geojson.dumps(feature_germany_berlin),
"microsoft_roads_midpoint",
)
end_asyncpg = time.time()
time_asyncpg = end_asyncpg - start_asyncpg
print(time_asyncpg) # ~4-5 sec

assert result_psycopg == result_asyncpg
assert time_psycopg == pytest.approx(time_asyncpg, abs=1) # allow 1 seconds diff