-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
229 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
# APP BLOOM | ||
|
||
/data/ | ||
data/*.csv | ||
data/*.xlsx | ||
!/data/README.md | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
""" | ||
- Clean "UN/LOCODE" column in wpi.csv and rename it to "locode" | ||
- Join ports.csv with the WPI (World Port Index) data on the "locode" column (if a row in ports.csv has a no "locode" in the WPI data, it should be discarded) | ||
- Filter the resulting dataframe to keep only big ports | ||
Tidal Range (m): Les ports avec une grande amplitude de marée peuvent généralement accueillir de gros navires et sont souvent des ports importants. | ||
Entrance Width (m): Une large entrée permet l'accès de grands navires. | ||
Channel Depth (m): Une profondeur de chenal importante est nécessaire pour les grands navires à fort tirant d'eau. | ||
Anchorage Depth (m): La profondeur à l'ancre indique si de grands navires peuvent mouiller en toute sécurité. | ||
Cargo Pier Depth (m): La profondeur au quai de chargement est critique pour accueillir de grands navires de fret. | ||
Oil Terminal Depth (m), Liquified Natural Gas Terminal Depth (m): La profondeur des terminaux pétroliers et gaziers indique la capacité du port à accueillir des pétroliers et des méthaniers, qui sont souvent de très grands navires. | ||
Maximum Vessel Length (m), Maximum Vessel Beam (m), Maximum Vessel Draft (m): Ces mesures donnent une idée de la taille maximale des navires que le port peut accueillir. | ||
Harbor Size: La taille du port peut indiquer sa capacité globale. | ||
""" | ||
|
||
import os | ||
import pandas as pd | ||
|
||
csv_input1 = os.path.join(os.path.dirname(__file__), "../ports_rad3000_res10.csv") | ||
csv_input2 = os.path.join(os.path.dirname(__file__), "../wpi.csv") | ||
csv_output = os.path.join(os.path.dirname(__file__), f"../result.csv") | ||
|
||
df_ports = pd.read_csv(csv_input1, sep=";") | ||
df_wpi = pd.read_csv(csv_input2, sep=";") | ||
|
||
# rename "UN/LOCODE" to "locode" | ||
df_wpi.rename(columns={"UN/LOCODE": "locode"}, inplace=True) | ||
|
||
# drop rows with no "locode" | ||
df_wpi = df_wpi.dropna(subset=["locode"]) | ||
|
||
#c lean "locode" column | ||
df_wpi["locode"] = df_wpi["locode"].apply(lambda x: x.replace(" ", "")) | ||
|
||
# join | ||
print(df_ports.shape) | ||
df = pd.merge(df_ports, df_wpi, on="locode", how="inner") | ||
print(df.shape) | ||
|
||
# filter | ||
# on anchor depth | ||
# depth = 1 | ||
# print(f"Before filter on depth = {depth}, shape = {df.shape}") | ||
# df = df[df["Anchorage Depth (m)"] > depth] | ||
# print(f"After filter on depth = {depth}, shape = {df.shape}") | ||
|
||
# just keep columns for db | ||
# url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer | ||
columns = ["url", "country", "port", "locode", "latitude", "longitude", "geometry_point", "geometry_buffer"] | ||
df = df[columns] | ||
|
||
# save | ||
df.to_csv(csv_output, sep=";", index=False) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import os | ||
import geopandas as gpd | ||
import pandas as pd | ||
from shapely.geometry import Point | ||
|
||
csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv") | ||
csv_output = os.path.join(os.path.dirname(__file__), "../ports.csv") | ||
|
||
|
||
def convert_coords(coord): | ||
""" | ||
Convert a string coordinate to a float coordinate | ||
""" | ||
if "N" in coord or "E" in coord: | ||
return float(coord.replace("N", "").replace("E", "")) | ||
elif "S" in coord or "W" in coord: | ||
return float(coord.replace("S", "").replace("W", "")) * -1 | ||
else: | ||
return float(coord) | ||
|
||
|
||
df = pd.read_csv(csv_input, sep=";") | ||
|
||
# Clean the latitude and longitude columns | ||
df["latitude"] = df["latitude"].apply(convert_coords) | ||
df["longitude"] = df["longitude"].apply(convert_coords) | ||
|
||
# Create a geometry column | ||
df["geometry_point"] = [Point(xy) for xy in zip(df.longitude, df.latitude)] | ||
|
||
# Create a GeoDataFrame | ||
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326") | ||
print(gdf.head()) | ||
|
||
gdf.to_csv(csv_output, index=False, sep=";") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
""" | ||
Another method with geodesic to have exactly radius_m meters around a port, no matter where on the globe | ||
(polygons of the ports in the north seams to be flat, it's normal, it's the projection) | ||
""" | ||
|
||
import os | ||
import geopandas as gpd | ||
import pandas as pd | ||
import pyproj | ||
from shapely import wkt | ||
from shapely.geometry import Polygon | ||
|
||
radius_m = 3000 # Radius in kilometers | ||
resolution = 10 # Number of points in the resulting polygon | ||
crs_epsg = 4326 # CRS for WGS84 | ||
|
||
csv_input = os.path.join(os.path.dirname(__file__), "../ports.csv") | ||
csv_output = os.path.join(os.path.dirname(__file__), f"../ports_rad{radius_m}_res{resolution}.csv") | ||
|
||
# Load CSV into DataFrame and convert WKT to geometry | ||
df = pd.read_csv(csv_input, sep=";") | ||
df["geometry_point"] = df["geometry_point"].apply(wkt.loads) | ||
gdf = gpd.GeoDataFrame(df, geometry="geometry_point", crs=crs_epsg) | ||
|
||
# Function to create geodesic buffer around a point | ||
def geodesic_point_buffer(lat, lon, radius_m, resolution): | ||
""" | ||
Input | ||
lat: latitude of the center point | ||
lon: longitude of the center point | ||
radius_m: radius of the buffer in meters | ||
resolution: number of points in the resulting polygon | ||
""" | ||
geod = pyproj.Geod(ellps="WGS84") # Define the ellipsoid | ||
# Create a circle in geodesic coordinates | ||
angles = range(0, 360, 360 // resolution) | ||
circle_points = [] | ||
for angle in angles: | ||
# Calculate the point on the circle for this angle | ||
lon2, lat2, _ = geod.fwd(lon, lat, angle, radius_m) | ||
circle_points.append((lon2, lat2)) | ||
# Create a polygon from these points | ||
return Polygon(circle_points) | ||
|
||
|
||
# Apply the buffer function to create geodesic buffers | ||
gdf["geometry_buffer"] = gdf.apply( | ||
lambda row: geodesic_point_buffer( | ||
float(row["latitude"]), | ||
float(row["longitude"]), | ||
radius_m, | ||
resolution, | ||
), | ||
axis=1, | ||
) | ||
|
||
# Save the GeoDataFrame with buffers to a new CSV file | ||
gdf.to_csv(csv_output, index=False, sep=";") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
import logging | ||
import os | ||
from pathlib import Path | ||
|
||
import geopandas as gpd | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import logging | ||
import os | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
from sqlalchemy import create_engine | ||
from bloom.config import settings | ||
|
||
|
||
logging.basicConfig() | ||
logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO) | ||
|
||
engine = create_engine(settings.db_url, echo=False) | ||
|
||
df = pd.read_csv( | ||
Path(settings.data_folder).joinpath("./ports_rad3000_res10.csv"), | ||
sep=";", | ||
) | ||
|
||
df.to_sql("ports", engine, if_exists="append", index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
"""create ports table | ||
Revision ID: 7962eee40abe | ||
Revises: 961cee5426d6 | ||
Create Date: 2024-02-26 18:38:37.726130 | ||
""" | ||
import sqlalchemy as sa | ||
from geoalchemy2 import Geometry | ||
|
||
from alembic import op | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "7962eee40abe" | ||
down_revision = "961cee5426d6" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade() -> None: | ||
""" | ||
ports.csv contains data for ports | ||
url;country;port;locode;latitude;longitude;geometry_point;geometry_buffer | ||
https://www.vesselfinder.com/ports/ALSAR001;Albania;Sarande;ALSAR;39.8701;20.0062;POINT (20.0062 39.8701 | ||
""" | ||
op.create_table( | ||
"ports", | ||
sa.Column("id", sa.Integer, primary_key=True), | ||
sa.Column("country", sa.String(255), nullable=False), | ||
sa.Column("port", sa.String(255), nullable=False), | ||
sa.Column("url", sa.String(255), nullable=False), | ||
sa.Column("locode", sa.String(255), nullable=False), | ||
sa.Column("latitude", sa.String(255), nullable=False), | ||
sa.Column("longitude", sa.String(255), nullable=False), | ||
sa.Column( | ||
"geometry_point", | ||
Geometry(geometry_type="POINT", srid=4326), | ||
nullable=False, | ||
), | ||
sa.Column( | ||
"geometry_buffer", | ||
Geometry(geometry_type="POLYGON", srid=4326), | ||
nullable=False, | ||
), | ||
) | ||
|
||
|
||
def downgrade() -> None: | ||
op.drop_table("ports") |