Skip to content

Commit

Permalink
Merge pull request #117 from c-hydro/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
ltrotter authored Nov 14, 2024
2 parents 5bb993e + a90dabc commit e075d7a
Show file tree
Hide file tree
Showing 13 changed files with 342 additions and 41 deletions.
1 change: 1 addition & 0 deletions =
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Requirement already satisfied: xlrd in /home/andrea/System/libs_conda/miniconda3/envs/door/lib/python3.12/site-packages (2.0.1)
3 changes: 2 additions & 1 deletion door/data_sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
from .cds import *
from .hsaf import *
from .drops2 import *
from .clms import *
from .clms import *
from .persiann import *
4 changes: 2 additions & 2 deletions door/data_sources/chirps/chirps_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def get_last_published_ts(self, prelim = None, product = None, **kwargs) -> ts.T
response = requests.head(current_url)

# if the request is successful, the last published timestep is the current timestep
if response.status_code == 200:
if response.status_code is requests.codes.ok:
return current_timestep

# if the request is not successful, move to the previous timestep
current_timestep -= 1

Expand Down
122 changes: 88 additions & 34 deletions door/data_sources/clms/clms_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
import xarray as xr
import tempfile
from typing import Optional, Iterable
import datetime as dt

from ...utils.space import BoundingBox, crop_to_bb
from ...utils.io import download_http, handle_missing
from ...tools import timestepping as ts
from ...tools.timestepping.timestep import TimeStep
from ...tools.timestepping.fixed_num_timestep import FixedNTimeStep
from ...tools.data import Dataset
from ...base_downloaders import URLDownloader

Expand All @@ -19,21 +21,22 @@ class CLMSDownloader(URLDownloader):
clms_url = "https://globalland.vito.be/download/"

default_options = {
'layers': None,
'variables': '020',
'crop_to_bounds': True,
'ts_per_year': 36
}

available_products = {
'swi': {
'versions': ["3.1.1", "3.2.1"],
'url': clms_url + 'geotiff/soil_water_index/swi_12.5km_v3_{ts_str}daily/{timestep.start:%Y}/{timestep.start:%Y%m%d}/c_gls_SWI{ts_str}-SWI{layer}_{timestep.start:%Y%m%d}1200_GLOBE_ASCAT_V{version}.tiff',
'url': clms_url + 'geotiff/soil_water_index/swi_12.5km_v3_{ts_str}daily/{timestep.start:%Y}/{timestep.start:%Y%m%d}/c_gls_SWI{ts_str}-SWI{var}_{timestep.start:%Y%m%d}1200_GLOBE_ASCAT_V{version}.tiff',
'nodata': 255,
'scale_factor': 0.5,
'available_layers': ["001", "005", "010", "020", "040", "060", "100"],
'scale_factor': 0.5
}
}

available_variables = ["001", "005", "010", "020", "040", "060", "100"]

def __init__(self, product: str) -> None:
self.set_product(product)

Expand All @@ -44,14 +47,67 @@ def set_product(self, product: str) -> None:
self.nodata = self.available_products[self.product]["nodata"]
self.scale_factor = self.available_products[self.product]["scale_factor"]
self.versions = self.available_products[self.product]["versions"]
self.available_layers = self.available_products[self.product]["available_layers"]
self.url_blank = self.available_products[self.product]["url"]

def set_variables(self, variables: list) -> None:
self.variables = []
for var in variables:
this_var = var.lower()
if this_var not in self.available_variables:
msg = f'Variable {var} not available. Choose one of {self.available_variables}'
else:
self.variables.append(this_var)
if len(self.variables) == 0:
raise ValueError('No valid variables selected')

def get_last_published_ts(self, **kwargs) -> ts.TimeRange:

"""
Get the last published date for the dataset.
"""

ts_per_year = self.ts_per_year

# Set ts_str based on the ts_per_year
if ts_per_year == 36:
ts_str = '10'
TimeStep = FixedNTimeStep.get_subclass(ts_per_year)
elif ts_per_year == 365:
ts_str = ''
TimeStep = ts.Day
else:
raise ValueError(f"ts_per_year {self.ts_per_year} not supported")

current_timestep = TimeStep.from_date(dt.datetime.now())
# Get the URL without version

while True:
for v in self.versions:
current_url_v = self.url_blank.format(
ts_str=ts_str,
timestep=current_timestep,
var=self.variables[0],
version=v
)

# send a request to the url
response = requests.head(current_url_v)

# if the request is successful, the last published timestep is the current timestep
if response.status_code is requests.codes.ok:
return current_timestep

# if the request is not successful, move to the previous timestep
current_timestep -= 1

def get_last_published_date(self, **kwargs) -> dt.datetime:
return self.get_last_published_ts(**kwargs).end

def _get_data_ts(self,
time_range: TimeStep,
space_bounds: BoundingBox,
tmp_path: str,
layer: str,
variable: str,
missing_action: str = 'warning',
**kwargs) -> Iterable[tuple[xr.DataArray, dict]]:
''' Get the data for a specific timestep. '''
Expand All @@ -60,39 +116,45 @@ def _get_data_ts(self,
url_blank = self.url_blank.format(
ts_str=self.ts_str,
timestep=time_range,
layer=layer,
var=variable,
version="{version}"
)

# Download the file
ts_end = time_range.end
tmp_filename_raw = f'temp_{self.product}{layer}_{ts_end:%Y%m%d}.tif'
tmp_filename_raw = f'temp_{self.product}{variable}_{ts_end:%Y%m%d}.tif'
tmp_destination = os.path.join(tmp_path, tmp_filename_raw)

# try to download the file in both versions
success = False
for version in self.versions:
url = url_blank.format(version=version)
try:
download_http(url, tmp_destination)
url_v = url_blank.format(version=version)

# Crop the data
cropped = crop_to_bb(tmp_destination, space_bounds)
response = requests.head(url_v)

# Change the nodata value to np.nan and return the data
cropped = cropped.where(~np.isclose(cropped, self.nodata, equal_nan=True), np.nan)
cropped.rio.no_data = np.nan
if response.status_code is requests.codes.ok:
url = url_v
success = True
break

# Apply the scale factor
cropped *= self.scale_factor
if success:
download_http(url, tmp_destination)

yield cropped, {'layer': layer}
break
# Crop the data
cropped = crop_to_bb(tmp_destination, space_bounds)

# Change the nodata value to np.nan and return the data
cropped = cropped.where(~np.isclose(cropped, self.nodata, equal_nan=True), np.nan)
cropped.rio.no_data = np.nan

# Apply the scale factor
cropped *= self.scale_factor

except Exception as e:
continue
yield cropped, {'variable': variable}

# If the loop ends without breaking, the data is missing
handle_missing(missing_action, {'timestep': time_range, 'layer': layer})
else:
# If the loop ends without breaking, the data is missing
handle_missing(missing_action, {'timestep': time_range, 'variable': variable})

def get_data(self,
time_range: ts.TimeRange,
Expand All @@ -106,14 +168,6 @@ def get_data(self,
if options is not None:
self.set_options(options)

# Check if the layers are available
if self.layers is None:
self.layers = self.available_layers
else:
for layer in self.layers:
if layer not in self.available_layers:
raise ValueError(f'Layer {layer} not available. Choose one of {self.available_layers}')

# Set ts_str based on the ts_per_year
if self.ts_per_year == 36:
self.ts_str = '10'
Expand Down Expand Up @@ -142,8 +196,8 @@ def get_data(self,

for timestep in timesteps:
with tempfile.TemporaryDirectory() as tmp_path:
for layer in self.layers:
data_struct = self._get_data_ts(timestep, space_bounds, tmp_path, layer)
for variable in self.variables:
data_struct = self._get_data_ts(timestep, space_bounds, tmp_path, variable)
if not data_struct:
self.log.warning(f'No data found for timestep {timestep}')
continue
Expand Down
2 changes: 1 addition & 1 deletion door/data_sources/hsaf/hsaf_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def get_last_published_ts(self, product = None, **kwargs) -> ts.TimeRange:
response = requests.head(current_url)

# if the request is successful, the last published timestep is the current timestep
if response.status_code == 200:
if response.status_code is requests.codes.ok:
return current_timestep

# if the request is not successful, move to the previous timestep
Expand Down
1 change: 1 addition & 0 deletions door/data_sources/jaxa/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .gsmap_downloader import GSMAPDownloader
111 changes: 111 additions & 0 deletions door/data_sources/jaxa/gsmap_downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import os
from typing import Generator
import numpy as np
import xarray as xr
import datetime as dt
import requests
import rioxarray

from ...tools import timestepping as ts
from ...tools.timestepping.timestep import TimeStep
from ...tools.timestepping.fixed_num_timestep import FixedNTimeStep
from ...base_downloaders import URLDownloader

from ...utils.space import BoundingBox, crop_to_bb
from ...utils.io import decompress_gz

class CHRSDownloader(URLDownloader):
source = "JAXA"
name = "GSMAPDownloader"

default_options = {
'get_prelim': True, # if True, will also download preliminary data if available
}

home = "sftp://[email protected]/realtime/hourly_G/"
prelim_home = home + "prelim/"
available_products: dict = {
"PDIRNow-1hourly": {
"ts_per_year": 8760,
"url": home + 'PDIRNow/PDIRNow1hourly/{timestep.start:%Y}/pdirnow1h{timestep.start:%y%m%d%H}.bin.gz',
"nodata": -9999,
"rows_cols": (3000, 9000),
"lat_lon_box": [59.98, -59.98, .02, 359.98],
"lat_lon_steps": [-0.04, 0.04],
"scale_factor": 100,
"dtype": '<i2' # little-endian int16
}
}

def __init__(self, product: str) -> None:
self.set_product(product)
super().__init__(self.url_blank, protocol='http')

def set_product(self, product: str) -> None:
self.product = product
if product not in self.available_products:
raise ValueError(f'Product {product} not available. Choose one of {self.available_products.keys()}')
self.ts_per_year = self.available_products[product]["ts_per_year"]
self.url_blank = self.available_products[product]["url"]
self.nodata = self.available_products[product]["nodata"]
self.rows_cols = self.available_products[product]["rows_cols"]
self.lat_lon_box = self.available_products[product]["lat_lon_box"]
self.lat_lon_steps = self.available_products[product]["lat_lon_steps"]
self.dtype = self.available_products[product]["dtype"]
self.scale_factor = self.available_products[product]["scale_factor"]

def _get_data_ts(self,
timestep: TimeStep,
space_bounds: BoundingBox,
tmp_path: str) -> Generator[tuple[xr.DataArray, dict], None, None]:

ts_end = timestep.end
tmp_filename_raw = f'temp_{self.product}{ts_end:%Y%m%d}'
tmp_filename = f'{tmp_filename_raw}.bin.gz'
tmp_destination = os.path.join(tmp_path, tmp_filename)
print(" --> Download " + str(timestep))
success = self.download(tmp_destination, min_size=200, missing_action='ignore', timestep=timestep)
if success:
# Unzip the data
unzipped = decompress_gz(tmp_destination)

data = np.fromfile(unzipped, dtype=np.dtype(self.dtype))
data = data.reshape(self.rows_cols)
data_in_mm_hr = data.astype(np.float32) / self.scale_factor
data_in_mm_hr[data == self.nodata] = np.nan

# Create latitude and longitude arrays
lons = np.arange(self.lat_lon_box[2], self.lat_lon_box[3] + self.lat_lon_steps[1]/2, self.lat_lon_steps[1])
lats = np.arange(self.lat_lon_box[0], self.lat_lon_box[1] + self.lat_lon_steps[0]/2, self.lat_lon_steps[0])

# Create the xarray DataArray compliant with the rio extension
data_array = xr.DataArray(
data_in_mm_hr,
dims=["y", "x"],
coords={
"y": lats,
"x": lons
},
name="precipitation_rate",
attrs={
"units": "mm/hr"}
)
# Set the CRS to WGS84 (EPSG:4326) and geotransform attributes
data_array = data_array.rio.write_crs("EPSG:4326")
data_array = data_array.rio.write_nodata(self.nodata)

# crop the data
cropped = crop_to_bb(data_array, space_bounds)

yield cropped, {}

def format_url(self, prelim=False, **kwargs):
"""
Format the url for the download
"""
if prelim:
url = self.url_prelim_blank.format(**kwargs)
else:
url = self.url_blank.format(**kwargs)
return url

1 change: 1 addition & 0 deletions door/data_sources/persiann/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .chrs_downloader import CHRSDownloader
Loading

0 comments on commit e075d7a

Please sign in to comment.