Skip to content

Commit

Permalink
add ports to db
Browse files Browse the repository at this point in the history
  • Loading branch information
romainm13 committed Mar 11, 2024
1 parent 698432f commit 8afc886
Show file tree
Hide file tree
Showing 9 changed files with 229 additions and 7 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# APP BLOOM

/data/
data/*.csv
data/*.xlsx
!/data/README.md


Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ build:

launch-dev-db:
@docker compose -f docker/docker-compose-db.yaml up -d
@sleep 20
@sleep 10
$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} alembic upgrade head
$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 src/alembic/init_script/load_vessels_data.py

load-ports-data:
$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 alembic/init_script/load_ports_data.py

load-amp-data:
$(BLOOM_DEV_DOCKER) --rm d4g/bloom:${VERSION} /venv/bin/python3 src/alembic/init_script/load_amp_data.py

Expand Down
11 changes: 7 additions & 4 deletions data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ To get them, contact BLOOM organisation
Don't share them in public repository when obtained

Filenames are fixed and is as below:
* chalutiers_pelagiques.csv
* spire_positions_subset.csv
* vessels_subset.csv
* zones_subset.csv

- `chalutiers_pelagiques.csv`
- `spire_positions_subset.csv`
- `vessels_subset.csv`
- `zones_subset.csv`
- `ports.csv`
- `ports_rad3000_res10.csv` (contains `geometry_buffer`, the polygon area around each port 3000meters and resolution 10)
54 changes: 54 additions & 0 deletions data/utils/merge_filter_wpi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
- Clean "UN/LOCODE" column in wpi.csv and rename it to "locode"
- Join ports.csv with the WPI (World Port Index) data on the "locode" column (if a row in ports.csv has a no "locode" in the WPI data, it should be discarded)
- Filter the resulting dataframe to keep only big ports
Tidal Range (m): Les ports avec une grande amplitude de marée peuvent généralement accueillir de gros navires et sont souvent des ports importants.
Entrance Width (m): Une large entrée permet l'accès de grands navires.
Channel Depth (m): Une profondeur de chenal importante est nécessaire pour les grands navires à fort tirant d'eau.
Anchorage Depth (m): La profondeur à l'ancre indique si de grands navires peuvent mouiller en toute sécurité.
Cargo Pier Depth (m): La profondeur au quai de chargement est critique pour accueillir de grands navires de fret.
Oil Terminal Depth (m), Liquified Natural Gas Terminal Depth (m): La profondeur des terminaux pétroliers et gaziers indique la capacité du port à accueillir des pétroliers et des méthaniers, qui sont souvent de très grands navires.
Maximum Vessel Length (m), Maximum Vessel Beam (m), Maximum Vessel Draft (m): Ces mesures donnent une idée de la taille maximale des navires que le port peut accueillir.
Harbor Size: La taille du port peut indiquer sa capacité globale.
"""

import os
import pandas as pd

csv_input1 = os.path.join(os.path.dirname(__file__), "../ports_rad3000_res10.csv")
csv_input2 = os.path.join(os.path.dirname(__file__), "../wpi.csv")
csv_output = os.path.join(os.path.dirname(__file__), f"../result.csv")

df_ports = pd.read_csv(csv_input1, sep=";")
df_wpi = pd.read_csv(csv_input2, sep=";")

# rename "UN/LOCODE" to "locode"
df_wpi.rename(columns={"UN/LOCODE": "locode"}, inplace=True)

# drop rows with no "locode"
df_wpi = df_wpi.dropna(subset=["locode"])

#c lean "locode" column
df_wpi["locode"] = df_wpi["locode"].apply(lambda x: x.replace(" ", ""))

# join
print(df_ports.shape)
df = pd.merge(df_ports, df_wpi, on="locode", how="inner")
print(df.shape)

# filter
# on anchor depth
# depth = 1
# print(f"Before filter on depth = {depth}, shape = {df.shape}")
# df = df[df["Anchorage Depth (m)"] > depth]
# print(f"After filter on depth = {depth}, shape = {df.shape}")

# just keep columns for db
# url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer
columns = ["url", "country", "port", "locode", "latitude", "longitude", "geometry_point", "geometry_buffer"]
df = df[columns]

# save
df.to_csv(csv_output, sep=";", index=False)

35 changes: 35 additions & 0 deletions data/utils/string2float_coordinates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point

csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv")
csv_output = os.path.join(os.path.dirname(__file__), "../ports.csv")


def convert_coords(coord):
"""
Convert a string coordinate to a float coordinate
"""
if "N" in coord or "E" in coord:
return float(coord.replace("N", "").replace("E", ""))
elif "S" in coord or "W" in coord:
return float(coord.replace("S", "").replace("W", "")) * -1
else:
return float(coord)


df = pd.read_csv(csv_input, sep=";")

# Clean the latitude and longitude columns
df["latitude"] = df["latitude"].apply(convert_coords)
df["longitude"] = df["longitude"].apply(convert_coords)

# Create a geometry column
df["geometry_point"] = [Point(xy) for xy in zip(df.longitude, df.latitude)]

# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
print(gdf.head())

gdf.to_csv(csv_output, index=False, sep=";")
58 changes: 58 additions & 0 deletions data/utils/zone_around_point.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Another method with geodesic to have exactly radius_m meters around a port, no matter where on the globe
(polygons of the ports in the north seams to be flat, it's normal, it's the projection)
"""

import os
import geopandas as gpd
import pandas as pd
import pyproj
from shapely import wkt
from shapely.geometry import Polygon

radius_m = 3000 # Radius in kilometers
resolution = 10 # Number of points in the resulting polygon
crs_epsg = 4326 # CRS for WGS84

csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv")
csv_output = os.path.join(os.path.dirname(__file__), f"../ports_rad{radius_m}_res{resolution}.csv")

# Load CSV into DataFrame and convert WKT to geometry
df = pd.read_csv(csv_input, sep=";")
df["geometry_point"] = df["geometry_point"].apply(wkt.loads)
gdf = gpd.GeoDataFrame(df, geometry="geometry_point", crs=crs_epsg)

# Function to create geodesic buffer around a point
def geodesic_point_buffer(lat, lon, radius_m, resolution):
"""
Input
lat: latitude of the center point
lon: longitude of the center point
radius_m: radius of the buffer in meters
resolution: number of points in the resulting polygon
"""
geod = pyproj.Geod(ellps="WGS84") # Define the ellipsoid
# Create a circle in geodesic coordinates
angles = range(0, 360, 360 // resolution)
circle_points = []
for angle in angles:
# Calculate the point on the circle for this angle
lon2, lat2, _ = geod.fwd(lon, lat, angle, radius_m)
circle_points.append((lon2, lat2))
# Create a polygon from these points
return Polygon(circle_points)


# Apply the buffer function to create geodesic buffers
gdf["geometry_buffer"] = gdf.apply(
lambda row: geodesic_point_buffer(
float(row["latitude"]),
float(row["longitude"]),
radius_m,
resolution,
),
axis=1,
)

# Save the GeoDataFrame with buffers to a new CSV file
gdf.to_csv(csv_output, index=False, sep=";")
1 change: 0 additions & 1 deletion src/alembic/init_script/load_amp_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import os
from pathlib import Path

import geopandas as gpd
Expand Down
20 changes: 20 additions & 0 deletions src/alembic/init_script/load_ports_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import logging
import os
from pathlib import Path

import pandas as pd
from sqlalchemy import create_engine
from bloom.config import settings


logging.basicConfig()
logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO)

engine = create_engine(settings.db_url, echo=False)

df = pd.read_csv(
Path(settings.data_folder).joinpath("./ports_rad3000_res10.csv"),
sep=";",
)

df.to_sql("ports", engine, if_exists="append", index=False)
49 changes: 49 additions & 0 deletions src/alembic/versions/7962eee40abe_create_port_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""create ports table
Revision ID: 7962eee40abe
Revises: 961cee5426d6
Create Date: 2024-02-26 18:38:37.726130
"""
import sqlalchemy as sa
from geoalchemy2 import Geometry

from alembic import op

# revision identifiers, used by Alembic.
revision = "7962eee40abe"
down_revision = "961cee5426d6"
branch_labels = None
depends_on = None


def upgrade() -> None:
"""
ports.csv contains data for ports
url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer
https://www.vesselfinder.com/ports/ALSAR001;Albania;Sarande;ALSAR;39.8701;20.0062;POINT (20.0062 39.8701
"""
op.create_table(
"ports",
sa.Column("id", sa.Integer, primary_key=True),
sa.Column("country", sa.String(255), nullable=False),
sa.Column("port", sa.String(255), nullable=False),
sa.Column("url", sa.String(255), nullable=False),
sa.Column("locode", sa.String(255), nullable=False),
sa.Column("latitude", sa.String(255), nullable=False),
sa.Column("longitude", sa.String(255), nullable=False),
sa.Column(
"geometry_point",
Geometry(geometry_type="POINT", srid=4326),
nullable=False,
),
sa.Column(
"geometry_buffer",
Geometry(geometry_type="POLYGON", srid=4326),
nullable=False,
),
)


def downgrade() -> None:
op.drop_table("ports")

0 comments on commit 8afc886

Please sign in to comment.