add ports to db

dataforgoodfr · Mar 11, 2024 · 8afc886 · 8afc886
1 parent 698432f
commit 8afc886
Show file tree

Hide file tree

Showing 9 changed files with 229 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 # APP BLOOM
 
-/data/
+data/*.csv
+data/*.xlsx
 !/data/README.md
 
 

diff --git a/Makefile b/Makefile
@@ -10,10 +10,13 @@ build:
 
 launch-dev-db:
 	@docker compose -f docker/docker-compose-db.yaml up -d
-	@sleep 20
+	@sleep 10
 	$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} alembic upgrade head
 	$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 src/alembic/init_script/load_vessels_data.py
 
+load-ports-data:
+	$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 alembic/init_script/load_ports_data.py
+
 load-amp-data:
 	$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 src/alembic/init_script/load_amp_data.py
 

diff --git a/data/README.md b/data/README.md
@@ -7,7 +7,10 @@ To get them, contact BLOOM organisation
 Don't share them in public repository when obtained
 
 Filenames are fixed and is as below:
-* chalutiers_pelagiques.csv
-* spire_positions_subset.csv
-* vessels_subset.csv
-* zones_subset.csv
+
+- `chalutiers_pelagiques.csv`
+- `spire_positions_subset.csv`
+- `vessels_subset.csv`
+- `zones_subset.csv`
+- `ports.csv`
+- `ports_rad3000_res10.csv` (contains `geometry_buffer`, the polygon area around each port 3000meters and resolution 10)
diff --git a/data/utils/merge_filter_wpi.py b/data/utils/merge_filter_wpi.py
@@ -0,0 +1,54 @@
+"""
+- Clean "UN/LOCODE" column in wpi.csv and rename it to "locode"
+- Join ports.csv with the WPI (World Port Index) data on the "locode" column (if a row in ports.csv has a no "locode" in the WPI data, it should be discarded)
+- Filter the resulting dataframe to keep only big ports
+
+Tidal Range (m): Les ports avec une grande amplitude de marée peuvent généralement accueillir de gros navires et sont souvent des ports importants.
+Entrance Width (m): Une large entrée permet l'accès de grands navires.
+Channel Depth (m): Une profondeur de chenal importante est nécessaire pour les grands navires à fort tirant d'eau.
+Anchorage Depth (m): La profondeur à l'ancre indique si de grands navires peuvent mouiller en toute sécurité.
+Cargo Pier Depth (m): La profondeur au quai de chargement est critique pour accueillir de grands navires de fret.
+Oil Terminal Depth (m), Liquified Natural Gas Terminal Depth (m): La profondeur des terminaux pétroliers et gaziers indique la capacité du port à accueillir des pétroliers et des méthaniers, qui sont souvent de très grands navires.
+Maximum Vessel Length (m), Maximum Vessel Beam (m), Maximum Vessel Draft (m): Ces mesures donnent une idée de la taille maximale des navires que le port peut accueillir.
+Harbor Size: La taille du port peut indiquer sa capacité globale.
+"""
+
+import os
+import pandas as pd
+
+csv_input1 = os.path.join(os.path.dirname(__file__), "../ports_rad3000_res10.csv")
+csv_input2 = os.path.join(os.path.dirname(__file__), "../wpi.csv")
+csv_output = os.path.join(os.path.dirname(__file__), f"../result.csv")
+
+df_ports = pd.read_csv(csv_input1, sep=";")
+df_wpi = pd.read_csv(csv_input2, sep=";")
+
+# rename "UN/LOCODE" to "locode"
+df_wpi.rename(columns={"UN/LOCODE": "locode"}, inplace=True)
+
+# drop rows with no "locode"
+df_wpi = df_wpi.dropna(subset=["locode"])
+
+#c lean "locode" column
+df_wpi["locode"] = df_wpi["locode"].apply(lambda x: x.replace(" ", ""))
+
+# join
+print(df_ports.shape)
+df = pd.merge(df_ports, df_wpi, on="locode", how="inner")
+print(df.shape)
+
+# filter
+# on anchor depth
+# depth = 1
+# print(f"Before filter on depth = {depth}, shape = {df.shape}")
+# df = df[df["Anchorage Depth (m)"] > depth]
+# print(f"After filter on depth = {depth}, shape = {df.shape}")
+
+# just keep columns for db
+# url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer
+columns = ["url", "country", "port", "locode", "latitude", "longitude", "geometry_point", "geometry_buffer"]
+df = df[columns]
+
+# save
+df.to_csv(csv_output, sep=";", index=False)
+
diff --git a/data/utils/string2float_coordinates.py b/data/utils/string2float_coordinates.py
@@ -0,0 +1,35 @@
+import os
+import geopandas as gpd
+import pandas as pd
+from shapely.geometry import Point
+
+csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv")
+csv_output = os.path.join(os.path.dirname(__file__), "../ports.csv")
+
+
+def convert_coords(coord):
+    """
+    Convert a string coordinate to a float coordinate
+    """
+    if "N" in coord or "E" in coord:
+        return float(coord.replace("N", "").replace("E", ""))
+    elif "S" in coord or "W" in coord:
+        return float(coord.replace("S", "").replace("W", "")) * -1
+    else:
+        return float(coord)
+
+
+df = pd.read_csv(csv_input, sep=";")
+
+# Clean the latitude and longitude columns
+df["latitude"] = df["latitude"].apply(convert_coords)
+df["longitude"] = df["longitude"].apply(convert_coords)
+
+# Create a geometry column
+df["geometry_point"] = [Point(xy) for xy in zip(df.longitude, df.latitude)]
+
+# Create a GeoDataFrame
+gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
+print(gdf.head())
+
+gdf.to_csv(csv_output, index=False, sep=";")
diff --git a/data/utils/zone_around_point.py b/data/utils/zone_around_point.py
@@ -0,0 +1,58 @@
+"""
+Another method with geodesic to have exactly radius_m meters around a port, no matter where on the globe
+(polygons of the ports in the north seams to be flat, it's normal, it's the projection)
+"""
+
+import os
+import geopandas as gpd
+import pandas as pd
+import pyproj
+from shapely import wkt
+from shapely.geometry import Polygon
+
+radius_m = 3000  # Radius in kilometers
+resolution = 10  # Number of points in the resulting polygon
+crs_epsg = 4326  # CRS for WGS84
+
+csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv")
+csv_output = os.path.join(os.path.dirname(__file__), f"../ports_rad{radius_m}_res{resolution}.csv")
+
+# Load CSV into DataFrame and convert WKT to geometry
+df = pd.read_csv(csv_input, sep=";")
+df["geometry_point"] = df["geometry_point"].apply(wkt.loads)
+gdf = gpd.GeoDataFrame(df, geometry="geometry_point", crs=crs_epsg)
+
+# Function to create geodesic buffer around a point
+def geodesic_point_buffer(lat, lon, radius_m, resolution):
+    """
+    Input
+    lat: latitude of the center point
+    lon: longitude of the center point
+    radius_m: radius of the buffer in meters
+    resolution: number of points in the resulting polygon
+    """
+    geod = pyproj.Geod(ellps="WGS84")  # Define the ellipsoid
+    # Create a circle in geodesic coordinates
+    angles = range(0, 360, 360 // resolution)
+    circle_points = []
+    for angle in angles:
+        # Calculate the point on the circle for this angle
+        lon2, lat2, _ = geod.fwd(lon, lat, angle, radius_m)
+        circle_points.append((lon2, lat2))
+    # Create a polygon from these points
+    return Polygon(circle_points)
+
+
+# Apply the buffer function to create geodesic buffers
+gdf["geometry_buffer"] = gdf.apply(
+    lambda row: geodesic_point_buffer(
+        float(row["latitude"]),
+        float(row["longitude"]),
+        radius_m,
+        resolution,
+    ),
+    axis=1,
+)
+
+# Save the GeoDataFrame with buffers to a new CSV file
+gdf.to_csv(csv_output, index=False, sep=";")
diff --git a/src/alembic/init_script/load_amp_data.py b/src/alembic/init_script/load_amp_data.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from pathlib import Path
 
 import geopandas as gpd

diff --git a/src/alembic/init_script/load_ports_data.py b/src/alembic/init_script/load_ports_data.py
@@ -0,0 +1,20 @@
+import logging
+import os
+from pathlib import Path
+
+import pandas as pd
+from sqlalchemy import create_engine
+from bloom.config import settings
+
+
+logging.basicConfig()
+logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO)
+
+engine = create_engine(settings.db_url, echo=False)
+
+df = pd.read_csv(
+    Path(settings.data_folder).joinpath("./ports_rad3000_res10.csv"),
+    sep=";",
+)
+
+df.to_sql("ports", engine, if_exists="append", index=False)
diff --git a/src/alembic/versions/7962eee40abe_create_port_table.py b/src/alembic/versions/7962eee40abe_create_port_table.py
@@ -0,0 +1,49 @@
+"""create ports table
+
+Revision ID: 7962eee40abe
+Revises: 961cee5426d6
+Create Date: 2024-02-26 18:38:37.726130
+
+"""
+import sqlalchemy as sa
+from geoalchemy2 import Geometry
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "7962eee40abe"
+down_revision = "961cee5426d6"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """
+    ports.csv contains data for ports
+    url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer
+    https://www.vesselfinder.com/ports/ALSAR001;Albania;Sarande;ALSAR;39.8701;20.0062;POINT (20.0062 39.8701
+    """
+    op.create_table(
+        "ports",
+        sa.Column("id", sa.Integer, primary_key=True),
+        sa.Column("country", sa.String(255), nullable=False),
+        sa.Column("port", sa.String(255), nullable=False),
+        sa.Column("url", sa.String(255), nullable=False),
+        sa.Column("locode", sa.String(255), nullable=False),
+        sa.Column("latitude", sa.String(255), nullable=False),
+        sa.Column("longitude", sa.String(255), nullable=False),
+        sa.Column(
+            "geometry_point",
+            Geometry(geometry_type="POINT", srid=4326),
+            nullable=False,
+        ),
+        sa.Column(
+            "geometry_buffer",
+            Geometry(geometry_type="POLYGON", srid=4326),
+            nullable=False,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("ports")