From c0608c70919d04b6342933ae3e3f45f99f8fb02e Mon Sep 17 00:00:00 2001 From: Ana Ordonez Date: Wed, 18 Dec 2024 14:16:37 -0800 Subject: [PATCH 1/2] Update xcdat_openxml.py --- pcmdi_metrics/io/xcdat_openxml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index d961a759f..cebe257b5 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -9,7 +9,7 @@ def xcdat_open( - infile: Union[str, list], data_var: str = None, decode_times: bool = True + infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks = None ) -> xr.Dataset: """ Open input file (netCDF, or xml generated by cdscan) From 6511fdf93731f31eeaf9599a69b6b54f248faa51 Mon Sep 17 00:00:00 2001 From: Ana Ordonez Date: Wed, 18 Dec 2024 14:36:24 -0800 Subject: [PATCH 2/2] update xcdat_open --- pcmdi_metrics/io/xcdat_openxml.py | 63 +++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/pcmdi_metrics/io/xcdat_openxml.py b/pcmdi_metrics/io/xcdat_openxml.py index cebe257b5..9e84a743c 100644 --- a/pcmdi_metrics/io/xcdat_openxml.py +++ b/pcmdi_metrics/io/xcdat_openxml.py @@ -7,9 +7,11 @@ import xcdat as xc import xmltodict +from pcmdi_metrics.io.xcdat_dataset_io import get_calendar + def xcdat_open( - infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks = None + infile: Union[str, list], data_var: str = None, decode_times: bool = True, chunks={} ) -> xr.Dataset: """ Open input file (netCDF, or xml generated by cdscan) @@ -24,6 +26,8 @@ def xcdat_open( decode_times : bool, optional If True, attempt to decode times encoded in the standard NetCDF datetime format into cftime.datetime objects. Otherwise, leave them encoded as numbers. This keyword may not be supported by all the backends, by default True. + chunks : int, "auto", dict, or None, optional + The chunk size used to load data into dask arrays. Returns ------- @@ -45,16 +49,67 @@ def xcdat_open( >>> ds = xcdat_open('mydata.xml') """ if isinstance(infile, list) or "*" in infile: - ds = xc.open_mfdataset(infile, data_var=data_var, decode_times=decode_times) + try: + ds = xc.open_mfdataset( + infile, data_var=data_var, decode_times=decode_times, chunks=chunks + ) + except ( + ValueError + ): # Could be due to non-cf-compliant calendar or other attribute + ds = xc.open_mfdataset( + infile, data_var=data_var, decode_times=False, chunks=chunks + ) + ds = fix_noncompliant_attr(ds) else: if infile.split(".")[-1].lower() == "xml": - ds = _xcdat_openxml(infile, data_var=data_var, decode_times=decode_times) + try: + ds = _xcdat_openxml( + infile, data_var=data_var, decode_times=decode_times, chunks=chunks + ) + except ( + ValueError + ): # Could be due to non-cf-compliant calendar or other attribute + ds = _xcdat_openxml( + infile, data_var=data_var, decode_times=False, chunks=chunks + ) + ds = fix_noncompliant_attr(ds) else: - ds = xc.open_dataset(infile, data_var=data_var, decode_times=decode_times) + try: + ds = xc.open_dataset( + infile, data_var=data_var, decode_times=decode_times, chunks=chunks + ) + except ( + ValueError + ): # Could be due to non-cf-compliant calendar or other attribute + ds = xc.open_dataset( + infile, data_var=data_var, decode_times=False, chunks=chunks + ) + ds = fix_noncompliant_attr(ds) return ds.bounds.add_missing_bounds() +def fix_noncompliant_attr(ds: xr.Dataset) -> xr.Dataset: + """Fix dataset attributes that do not meet cf standards + + Parameters + ---------- + ds: xr.Dataset + xarray dataset to fix + + Returns + ------- + xr.Dataset + xarray dataset with updated attributes + """ + # Add any calendar fixes here + cal = get_calendar(ds) + cal = cal.replace("-", "_") + ds.time.attrs["calendar"] = cal + ds = xc.decode_time(ds) + return ds + + def _xcdat_openxml( xmlfile: str, data_var: str = None, decode_times: bool = True ) -> xr.Dataset: