Skip to content

Commit

Permalink
added depth to the script
Browse files Browse the repository at this point in the history
  • Loading branch information
taddyb committed Jan 22, 2025
1 parent 18c3183 commit 8561610
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 96 deletions.
134 changes: 134 additions & 0 deletions scripts/swot_width_depth_to_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import argparse
from pathlib import Path

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
from shapely.geometry import Point
from tqdm import tqdm
import xarray as xr
import zarr

def parse_arguments():
"""Parsing the arguments for zone"""
parser = argparse.ArgumentParser(description='Process HydroSWOT river width and depth obs for a specified zone')
parser.add_argument('--zone', type=str, required=True, help='Zone identifier for MERIT data processing')
return parser.parse_args()

def merge(zone: str):
"""Merging and writing the hydroswot streamflow points to the edges zarr store
Parameters
----------
zone: str
the merit zone we're pulling obs from
Note
----
You will need to install the following pacakge for this script to work:
`uv pip install openpyxl`
"""
print("reading zarr store and excel file")
root_path = Path(f"/projects/mhpi/data/MERIT/zarr/graph/CONUS/edges/{zone}")
if root_path.exists() is False:
raise FileNotFoundError("Cannot find your Zarr store")
root = zarr.open_group(root_path)

file_path = Path("/projects/mhpi/data/swot/SWOT_ADCP_Dataset.xlsx")
if file_path.exists() == False:
raise FileNotFoundError("Cannot find SWOT data")
df = pd.read_excel(file_path)
geometry = [Point(xy) for xy in zip(df["dec_long_va"], df["dec_lat_va"])]
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

print("reading river gdf and buffering lines")
# basin_shp_file = Path(f"/projects/mhpi/data/MERIT/raw/basins/cat_pfaf_{zone}_MERIT_Hydro_v07_Basins_v01_bugfix1.shp")
riv_shp_file = Path(f"/projects/mhpi/data/MERIT/raw/flowlines/riv_pfaf_{zone}_MERIT_Hydro_v07_Basins_v01_bugfix1.shp")
# if basin_shp_file.exists() == False:
# raise FileNotFoundError("Cannot find MERIT basin COMID data")
if riv_shp_file.exists() == False:
raise FileNotFoundError("Cannot find MERIT flowlines COMID data")
# basin_gdf = gpd.read_file(filename=basin_shp_file)
riv_gdf = gpd.read_file(filename=riv_shp_file).to_crs("EPSG:5070")

riv_gdf.geometry = riv_gdf.buffer(200)
riv_gdf = riv_gdf.to_crs("EPSG:4326")

print("Running spatial join")
matched_gdf = gpd.sjoin(left_df=gdf, right_df=riv_gdf, how='inner', predicate='intersects')
geometry = [Point(xy) for xy in zip(matched_gdf["dec_long_va"], matched_gdf["dec_lat_va"])]
point_gdf = gpd.GeoDataFrame(matched_gdf, geometry=geometry, crs="EPSG:4326")

if "mean_observed_swot_width" not in root:
json_ = {
"merit_COMID": point_gdf["COMID"].values,
"drainage_area": point_gdf["drain_area_va"].values * 2.58999, # converting from mi^2 to km^2
"width": point_gdf["stream_wdth_va"].values * 0.3048, # converting from feet to meters
}
df = pl.DataFrame(
data=json_,
).filter(~pl.all_horizontal(pl.col("width").is_nan())).filter(~pl.all_horizontal(pl.col("drainage_area").is_nan()))

drainage_area_avg = df.group_by("merit_COMID").agg(pl.col("drainage_area").mean()).sort("merit_COMID")
width_avg = df.group_by("merit_COMID").agg(pl.col("width").mean()).sort("merit_COMID")
comids = width_avg.select("merit_COMID").to_numpy().squeeze()
basins = root.merit_basin[:]
width_np = np.full_like(root.id[:], -1)
for comid in tqdm(comids, desc="writing comids"):
idx = np.argwhere(basins == comid).squeeze()
uparea = root.uparea[idx][-1]
measured_width = width_avg.filter(pl.col("merit_COMID") == comid).select("width").to_numpy()[0][0]
measured_da = drainage_area_avg.filter(pl.col("merit_COMID") == comid).select("drainage_area").to_numpy()[0][0]
if uparea < 1000:
threshold = 0.25
else:
threshold = 0.1
if np.abs((measured_da - uparea) / uparea) < threshold:
width_np[idx] = measured_width

root.create_array(name="mean_observed_swot_width", data=width_np.astype(np.float32))
else:
print("width write has already been made")

if "mean_observed_swot_depth" not in root:
json_ = {
"merit_COMID": point_gdf["COMID"].values,
"drainage_area": point_gdf["drain_area_va"].values * 2.58999, # converting from mi^2 to km^2
"mean_depth": point_gdf["mean_depth_va"].values * 0.3048, # converting from feet to meters
}
df = pl.DataFrame(
data=json_,
).filter(~pl.all_horizontal(pl.col("mean_depth").is_nan())).filter(~pl.all_horizontal(pl.col("drainage_area").is_nan()))

drainage_area_avg = df.group_by("merit_COMID").agg(pl.col("drainage_area").mean()).sort("merit_COMID")
depth_avg = df.group_by("merit_COMID").agg(pl.col("mean_depth").mean()).sort("merit_COMID")

comids = depth_avg.select("merit_COMID").to_numpy().squeeze()
basins = root.merit_basin[:]
depth_np = np.full_like(root.id[:], -1)
for comid in tqdm(comids, desc="writing comids"):
idx = np.argwhere(basins == comid).squeeze()
uparea = root.uparea[idx][-1]
measured_depth = depth_avg.filter(pl.col("merit_COMID") == comid).select("mean_depth").to_numpy()[0][0]
measured_da = drainage_area_avg.filter(pl.col("merit_COMID") == comid).select("drainage_area").to_numpy()[0][0]
if uparea < 1000:
threshold = 0.25
else:
threshold = 0.1
if np.abs((measured_da - uparea) / uparea) < threshold:
depth_np[idx] = measured_depth

root.array(name="mean_observed_swot_depth", data=depth_np.astype(np.float32))
else:
print("depth write has already been made")

print("Finished")


if __name__ == "__main__":
args = parse_arguments()
merge(args.zone)
# zone = "74"
# merge(zone)
96 changes: 0 additions & 96 deletions scripts/swot_width_to_zarr.py

This file was deleted.

0 comments on commit 8561610

Please sign in to comment.