From 8afc886ec3efa9249e98c8fe23d9ee11ed72b2e7 Mon Sep 17 00:00:00 2001 From: romainm13 Date: Tue, 12 Mar 2024 00:28:56 +0100 Subject: [PATCH] add ports to db --- .gitignore | 3 +- Makefile | 5 +- data/README.md | 11 ++-- data/utils/merge_filter_wpi.py | 54 +++++++++++++++++ data/utils/string2float_coordinates.py | 35 +++++++++++ data/utils/zone_around_point.py | 58 +++++++++++++++++++ src/alembic/init_script/load_amp_data.py | 1 - src/alembic/init_script/load_ports_data.py | 20 +++++++ .../7962eee40abe_create_port_table.py | 49 ++++++++++++++++ 9 files changed, 229 insertions(+), 7 deletions(-) create mode 100644 data/utils/merge_filter_wpi.py create mode 100644 data/utils/string2float_coordinates.py create mode 100644 data/utils/zone_around_point.py create mode 100644 src/alembic/init_script/load_ports_data.py create mode 100644 src/alembic/versions/7962eee40abe_create_port_table.py diff --git a/.gitignore b/.gitignore index 55af72a6..4adeaf7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # APP BLOOM -/data/ +data/*.csv +data/*.xlsx !/data/README.md diff --git a/Makefile b/Makefile index 0554c017..61628ca7 100644 --- a/Makefile +++ b/Makefile @@ -10,10 +10,13 @@ build: launch-dev-db: @docker compose -f docker/docker-compose-db.yaml up -d - @sleep 20 + @sleep 10 $(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} alembic upgrade head $(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 src/alembic/init_script/load_vessels_data.py +load-ports-data: + $(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 alembic/init_script/load_ports_data.py + load-amp-data: $(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 src/alembic/init_script/load_amp_data.py diff --git a/data/README.md b/data/README.md index 93672f03..317832d3 100644 --- a/data/README.md +++ b/data/README.md @@ -7,7 +7,10 @@ To get them, contact BLOOM organisation Don't share them in public repository when obtained Filenames are fixed and is as below: -* chalutiers_pelagiques.csv -* spire_positions_subset.csv -* vessels_subset.csv -* zones_subset.csv \ No newline at end of file + +- `chalutiers_pelagiques.csv` +- `spire_positions_subset.csv` +- `vessels_subset.csv` +- `zones_subset.csv` +- `ports.csv` +- `ports_rad3000_res10.csv` (contains `geometry_buffer`, the polygon area around each port 3000meters and resolution 10) diff --git a/data/utils/merge_filter_wpi.py b/data/utils/merge_filter_wpi.py new file mode 100644 index 00000000..17afa6fd --- /dev/null +++ b/data/utils/merge_filter_wpi.py @@ -0,0 +1,54 @@ +""" +- Clean "UN/LOCODE" column in wpi.csv and rename it to "locode" +- Join ports.csv with the WPI (World Port Index) data on the "locode" column (if a row in ports.csv has a no "locode" in the WPI data, it should be discarded) +- Filter the resulting dataframe to keep only big ports + +Tidal Range (m): Les ports avec une grande amplitude de marée peuvent généralement accueillir de gros navires et sont souvent des ports importants. +Entrance Width (m): Une large entrée permet l'accès de grands navires. +Channel Depth (m): Une profondeur de chenal importante est nécessaire pour les grands navires à fort tirant d'eau. +Anchorage Depth (m): La profondeur à l'ancre indique si de grands navires peuvent mouiller en toute sécurité. +Cargo Pier Depth (m): La profondeur au quai de chargement est critique pour accueillir de grands navires de fret. +Oil Terminal Depth (m), Liquified Natural Gas Terminal Depth (m): La profondeur des terminaux pétroliers et gaziers indique la capacité du port à accueillir des pétroliers et des méthaniers, qui sont souvent de très grands navires. +Maximum Vessel Length (m), Maximum Vessel Beam (m), Maximum Vessel Draft (m): Ces mesures donnent une idée de la taille maximale des navires que le port peut accueillir. +Harbor Size: La taille du port peut indiquer sa capacité globale. +""" + +import os +import pandas as pd + +csv_input1 = os.path.join(os.path.dirname(__file__), "../ports_rad3000_res10.csv") +csv_input2 = os.path.join(os.path.dirname(__file__), "../wpi.csv") +csv_output = os.path.join(os.path.dirname(__file__), f"../result.csv") + +df_ports = pd.read_csv(csv_input1, sep=";") +df_wpi = pd.read_csv(csv_input2, sep=";") + +# rename "UN/LOCODE" to "locode" +df_wpi.rename(columns={"UN/LOCODE": "locode"}, inplace=True) + +# drop rows with no "locode" +df_wpi = df_wpi.dropna(subset=["locode"]) + +#c lean "locode" column +df_wpi["locode"] = df_wpi["locode"].apply(lambda x: x.replace(" ", "")) + +# join +print(df_ports.shape) +df = pd.merge(df_ports, df_wpi, on="locode", how="inner") +print(df.shape) + +# filter +# on anchor depth +# depth = 1 +# print(f"Before filter on depth = {depth}, shape = {df.shape}") +# df = df[df["Anchorage Depth (m)"] > depth] +# print(f"After filter on depth = {depth}, shape = {df.shape}") + +# just keep columns for db +# url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer +columns = ["url", "country", "port", "locode", "latitude", "longitude", "geometry_point", "geometry_buffer"] +df = df[columns] + +# save +df.to_csv(csv_output, sep=";", index=False) + \ No newline at end of file diff --git a/data/utils/string2float_coordinates.py b/data/utils/string2float_coordinates.py new file mode 100644 index 00000000..a631a8d4 --- /dev/null +++ b/data/utils/string2float_coordinates.py @@ -0,0 +1,35 @@ +import os +import geopandas as gpd +import pandas as pd +from shapely.geometry import Point + +csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv") +csv_output = os.path.join(os.path.dirname(__file__), "../ports.csv") + + +def convert_coords(coord): + """ + Convert a string coordinate to a float coordinate + """ + if "N" in coord or "E" in coord: + return float(coord.replace("N", "").replace("E", "")) + elif "S" in coord or "W" in coord: + return float(coord.replace("S", "").replace("W", "")) * -1 + else: + return float(coord) + + +df = pd.read_csv(csv_input, sep=";") + +# Clean the latitude and longitude columns +df["latitude"] = df["latitude"].apply(convert_coords) +df["longitude"] = df["longitude"].apply(convert_coords) + +# Create a geometry column +df["geometry_point"] = [Point(xy) for xy in zip(df.longitude, df.latitude)] + +# Create a GeoDataFrame +gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326") +print(gdf.head()) + +gdf.to_csv(csv_output, index=False, sep=";") diff --git a/data/utils/zone_around_point.py b/data/utils/zone_around_point.py new file mode 100644 index 00000000..77ab3400 --- /dev/null +++ b/data/utils/zone_around_point.py @@ -0,0 +1,58 @@ +""" +Another method with geodesic to have exactly radius_m meters around a port, no matter where on the globe +(polygons of the ports in the north seams to be flat, it's normal, it's the projection) +""" + +import os +import geopandas as gpd +import pandas as pd +import pyproj +from shapely import wkt +from shapely.geometry import Polygon + +radius_m = 3000 # Radius in kilometers +resolution = 10 # Number of points in the resulting polygon +crs_epsg = 4326 # CRS for WGS84 + +csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv") +csv_output = os.path.join(os.path.dirname(__file__), f"../ports_rad{radius_m}_res{resolution}.csv") + +# Load CSV into DataFrame and convert WKT to geometry +df = pd.read_csv(csv_input, sep=";") +df["geometry_point"] = df["geometry_point"].apply(wkt.loads) +gdf = gpd.GeoDataFrame(df, geometry="geometry_point", crs=crs_epsg) + +# Function to create geodesic buffer around a point +def geodesic_point_buffer(lat, lon, radius_m, resolution): + """ + Input + lat: latitude of the center point + lon: longitude of the center point + radius_m: radius of the buffer in meters + resolution: number of points in the resulting polygon + """ + geod = pyproj.Geod(ellps="WGS84") # Define the ellipsoid + # Create a circle in geodesic coordinates + angles = range(0, 360, 360 // resolution) + circle_points = [] + for angle in angles: + # Calculate the point on the circle for this angle + lon2, lat2, _ = geod.fwd(lon, lat, angle, radius_m) + circle_points.append((lon2, lat2)) + # Create a polygon from these points + return Polygon(circle_points) + + +# Apply the buffer function to create geodesic buffers +gdf["geometry_buffer"] = gdf.apply( + lambda row: geodesic_point_buffer( + float(row["latitude"]), + float(row["longitude"]), + radius_m, + resolution, + ), + axis=1, +) + +# Save the GeoDataFrame with buffers to a new CSV file +gdf.to_csv(csv_output, index=False, sep=";") \ No newline at end of file diff --git a/src/alembic/init_script/load_amp_data.py b/src/alembic/init_script/load_amp_data.py index 4bfdc532..0c186f50 100644 --- a/src/alembic/init_script/load_amp_data.py +++ b/src/alembic/init_script/load_amp_data.py @@ -1,5 +1,4 @@ import logging -import os from pathlib import Path import geopandas as gpd diff --git a/src/alembic/init_script/load_ports_data.py b/src/alembic/init_script/load_ports_data.py new file mode 100644 index 00000000..3b5f8e53 --- /dev/null +++ b/src/alembic/init_script/load_ports_data.py @@ -0,0 +1,20 @@ +import logging +import os +from pathlib import Path + +import pandas as pd +from sqlalchemy import create_engine +from bloom.config import settings + + +logging.basicConfig() +logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO) + +engine = create_engine(settings.db_url, echo=False) + +df = pd.read_csv( + Path(settings.data_folder).joinpath("./ports_rad3000_res10.csv"), + sep=";", +) + +df.to_sql("ports", engine, if_exists="append", index=False) diff --git a/src/alembic/versions/7962eee40abe_create_port_table.py b/src/alembic/versions/7962eee40abe_create_port_table.py new file mode 100644 index 00000000..a6585945 --- /dev/null +++ b/src/alembic/versions/7962eee40abe_create_port_table.py @@ -0,0 +1,49 @@ +"""create ports table + +Revision ID: 7962eee40abe +Revises: 961cee5426d6 +Create Date: 2024-02-26 18:38:37.726130 + +""" +import sqlalchemy as sa +from geoalchemy2 import Geometry + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "7962eee40abe" +down_revision = "961cee5426d6" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """ + ports.csv contains data for ports + url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer + https://www.vesselfinder.com/ports/ALSAR001;Albania;Sarande;ALSAR;39.8701;20.0062;POINT (20.0062 39.8701 + """ + op.create_table( + "ports", + sa.Column("id", sa.Integer, primary_key=True), + sa.Column("country", sa.String(255), nullable=False), + sa.Column("port", sa.String(255), nullable=False), + sa.Column("url", sa.String(255), nullable=False), + sa.Column("locode", sa.String(255), nullable=False), + sa.Column("latitude", sa.String(255), nullable=False), + sa.Column("longitude", sa.String(255), nullable=False), + sa.Column( + "geometry_point", + Geometry(geometry_type="POINT", srid=4326), + nullable=False, + ), + sa.Column( + "geometry_buffer", + Geometry(geometry_type="POLYGON", srid=4326), + nullable=False, + ), + ) + + +def downgrade() -> None: + op.drop_table("ports")