From 9d6febc5670c799a299868d018bee7a43202fe57 Mon Sep 17 00:00:00 2001 From: Bob Torgerson Date: Fri, 29 Sep 2023 11:51:39 -0800 Subject: [PATCH] Historical and projected 2km temperature data ingests. --- iem/tas_historical_2km/combine.py | 47 ++++++ iem/tas_historical_2km/ingest.json | 220 ++++++++++++++++++++++++ iem/tas_historical_2km/luts.py | 132 +++++++++++++++ iem/tas_historical_2km/merge.py | 75 +++++++++ iem/tas_projected_2km/ingest.json | 23 +-- iem/tas_projected_2km/luts_ingest.json | 221 ------------------------- iem/tas_projected_2km/merge.py | 6 +- 7 files changed, 485 insertions(+), 239 deletions(-) create mode 100644 iem/tas_historical_2km/combine.py create mode 100644 iem/tas_historical_2km/ingest.json create mode 100644 iem/tas_historical_2km/luts.py create mode 100644 iem/tas_historical_2km/merge.py delete mode 100644 iem/tas_projected_2km/luts_ingest.json diff --git a/iem/tas_historical_2km/combine.py b/iem/tas_historical_2km/combine.py new file mode 100644 index 0000000..61cabe8 --- /dev/null +++ b/iem/tas_historical_2km/combine.py @@ -0,0 +1,47 @@ +import xarray as xr +import glob +import os + +# Define the list of possible models and months +months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] + +# Directory where the NetCDF files are located +data_dir = "/usr/local/data/torgerso/historicaltemperature/tas" + + +for month in months: + # Initialize empty lists for each variable + tas_list = [] + tasmax_list = [] + tasmin_list = [] + + # Find NetCDF files matching the naming convention in the specified directory + tas_file = os.path.join(data_dir, f"tas_{month}_temperature.nc") + tasmax_file = os.path.join(data_dir, f"tasmax_{month}_temperature.nc") + tasmin_file = os.path.join(data_dir, f"tasmin_{month}_temperature.nc") + + # Open and append variables to respective lists if files exist + if os.path.exists(tas_file): + tas_list.append(xr.open_dataset(tas_file)["tas"]) + if os.path.exists(tasmax_file): + tasmax_list.append(xr.open_dataset(tasmax_file)["tasmax"]) + if os.path.exists(tasmin_file): + tasmin_list.append(xr.open_dataset(tasmin_file)["tasmin"]) + + # Concatenate variables along the 'year' dimension in chunks + combined_tas = xr.concat(tas_list, dim="year") + combined_tasmax = xr.concat(tasmax_list, dim="year") + combined_tasmin = xr.concat(tasmin_list, dim="year") + + # Create a new dataset with the modified variables + combined_dataset = xr.Dataset( + {"tas": combined_tas, "tasmax": combined_tasmax, "tasmin": combined_tasmin} + ) + + # Save the combined dataset to a new NetCDF file in the specified directory + output_filename = os.path.join(data_dir, f"combined_{month}_temperature.nc") + combined_dataset.to_netcdf(output_filename, format="NETCDF4", mode="w") + + print(f"Combined and saved {output_filename}") + +print("All files combined successfully.") diff --git a/iem/tas_historical_2km/ingest.json b/iem/tas_historical_2km/ingest.json new file mode 100644 index 0000000..bd88937 --- /dev/null +++ b/iem/tas_historical_2km/ingest.json @@ -0,0 +1,220 @@ +{ + "config": { + "service_url": "http://localhost:8080/rasdaman/ows", + "tmp_directory": "/tmp/", + "crs_resolver": "http://localhost:8080/def/", + "default_crs": "http://localhost:8080/def/crs/EPSG/0/3338", + "default_null_values": [ + "-9999" + ], + "mock": false, + "automated": true + }, + "input": { + "coverage_id": "tas_2km_historical", + "paths": [ + "/usr/local/data/torgerso/historicaltemperature/tas/attempt/*.nc" + ] + }, + "recipe": { + "name": "general_coverage", + "options": { + "wms_import": true, + "import_order": "ascending", + "coverage": { + "crs": "OGC/0/Index1D?axis-label=\"month\"@OGC/0/Index1D?axis-label=\"year\"@EPSG/0/3338", + "metadata": { + "type": "xml", + "global": { + "Title": "'Historical 2km Temperature'", + "Encoding": { + "tas": "C", + "tasmax": "C", + "tasmin": "C", + "month": { + "0": "01", + "1": "02", + "2": "03", + "3": "04", + "4": "05", + "5": "06", + "6": "07", + "7": "08", + "8": "09", + "9": "10", + "10": "11", + "11": "12" + }, + "year": { + "0": "1901", + "1": "1902", + "2": "1903", + "3": "1904", + "4": "1905", + "5": "1906", + "6": "1907", + "7": "1908", + "8": "1909", + "9": "1910", + "10": "1911", + "11": "1912", + "12": "1913", + "13": "1914", + "14": "1915", + "15": "1916", + "16": "1917", + "17": "1918", + "18": "1919", + "19": "1920", + "20": "1921", + "21": "1922", + "22": "1923", + "23": "1924", + "24": "1925", + "25": "1926", + "26": "1927", + "27": "1928", + "28": "1929", + "29": "1930", + "30": "1931", + "31": "1932", + "32": "1933", + "33": "1934", + "34": "1935", + "35": "1936", + "36": "1937", + "37": "1938", + "38": "1939", + "39": "1940", + "40": "1941", + "41": "1942", + "42": "1943", + "43": "1944", + "44": "1945", + "45": "1946", + "46": "1947", + "47": "1948", + "48": "1949", + "49": "1950", + "50": "1951", + "51": "1952", + "52": "1953", + "53": "1954", + "54": "1955", + "55": "1956", + "56": "1957", + "57": "1958", + "58": "1959", + "59": "1960", + "60": "1961", + "61": "1962", + "62": "1963", + "63": "1964", + "64": "1965", + "65": "1966", + "66": "1967", + "67": "1968", + "68": "1969", + "69": "1970", + "70": "1971", + "71": "1972", + "72": "1973", + "73": "1974", + "74": "1975", + "75": "1976", + "76": "1977", + "77": "1978", + "78": "1979", + "79": "1980", + "80": "1981", + "81": "1982", + "82": "1983", + "83": "1984", + "84": "1985", + "85": "1986", + "86": "1987", + "87": "1988", + "88": "1989", + "89": "1990", + "90": "1991", + "91": "1992", + "92": "1993", + "93": "1994", + "94": "1995", + "95": "1996", + "96": "1997", + "97": "1998", + "98": "1999", + "99": "2000", + "100": "2001", + "101": "2002", + "102": "2003", + "103": "2004", + "104": "2005", + "105": "2006", + "106": "2007", + "107": "2008", + "108": "2009", + "109": "2010", + "110": "2011", + "111": "2012", + "112": "2013", + "113": "2014", + "114": "2015" + } + } + } + }, + "slicer": { + "type": "netcdf", + "pixelIsPoint": true, + "bands": [ + { + "name": "tas", + "identifier": "tas", + "nilValue": "-9999.0" + }, + { + "name": "tasmax", + "identifier": "tasmax", + "nilValue": "-9999.0" + }, + { + "name": "tasmin", + "identifier": "tasmin", + "nilValue": "-9999.0" + } + ], + "axes": { + "month": { + "statements": "import imp, os; luts = imp.load_source('luts', os.getenv('LUTS_PATH')); regex_str = 'combined_(01|02|03|04|05|06|07|08|09|10|11|12)_temperature.nc'", + "min": "luts.months[regex_extract('${file:name}', regex_str, 1)]", + "irregular": true, + "dataBound": false, + "gridOrder": 0 + }, + "year": { + "min": "luts.years['1901']", + "max": "luts.years['2015']", + "directPositions": "[luts.years[x] for x in ${netcdf:variable:year}]", + "irregular": true, + "gridOrder": 1 + }, + "X": { + "min": "${netcdf:variable:x:min}", + "max": "${netcdf:variable:x:max}", + "resolution": "${netcdf:variable:x:resolution}", + "gridOrder": 3 + }, + "Y": { + "min": "${netcdf:variable:y:min}", + "max": "${netcdf:variable:y:max}", + "resolution": "${netcdf:variable:y:resolution}", + "gridOrder": 2 + } + } + } + } + } + } +} \ No newline at end of file diff --git a/iem/tas_historical_2km/luts.py b/iem/tas_historical_2km/luts.py new file mode 100644 index 0000000..c89ff82 --- /dev/null +++ b/iem/tas_historical_2km/luts.py @@ -0,0 +1,132 @@ +months = { + "01": 0, + "02": 1, + "03": 2, + "04": 3, + "05": 4, + "06": 5, + "07": 6, + "08": 7, + "09": 8, + "10": 9, + "11": 10, + "12": 11, +} + +years = { + "1901": 0, + "1902": 1, + "1903": 2, + "1904": 3, + "1905": 4, + "1906": 5, + "1907": 6, + "1908": 7, + "1909": 8, + "1910": 9, + "1911": 10, + "1912": 11, + "1913": 12, + "1914": 13, + "1915": 14, + "1916": 15, + "1917": 16, + "1918": 17, + "1919": 18, + "1920": 19, + "1921": 20, + "1922": 21, + "1923": 22, + "1924": 23, + "1925": 24, + "1926": 25, + "1927": 26, + "1928": 27, + "1929": 28, + "1930": 29, + "1931": 30, + "1932": 31, + "1933": 32, + "1934": 33, + "1935": 34, + "1936": 35, + "1937": 36, + "1938": 37, + "1939": 38, + "1940": 39, + "1941": 40, + "1942": 41, + "1943": 42, + "1944": 43, + "1945": 44, + "1946": 45, + "1947": 46, + "1948": 47, + "1949": 48, + "1950": 49, + "1951": 50, + "1952": 51, + "1953": 52, + "1954": 53, + "1955": 54, + "1956": 55, + "1957": 56, + "1958": 57, + "1959": 58, + "1960": 59, + "1961": 60, + "1962": 61, + "1963": 62, + "1964": 63, + "1965": 64, + "1966": 65, + "1967": 66, + "1968": 67, + "1969": 68, + "1970": 69, + "1971": 70, + "1972": 71, + "1973": 72, + "1974": 73, + "1975": 74, + "1976": 75, + "1977": 76, + "1978": 77, + "1979": 78, + "1980": 79, + "1981": 80, + "1982": 81, + "1983": 82, + "1984": 83, + "1985": 84, + "1986": 85, + "1987": 86, + "1988": 87, + "1989": 88, + "1990": 89, + "1991": 90, + "1992": 91, + "1993": 92, + "1994": 93, + "1995": 94, + "1996": 95, + "1997": 96, + "1998": 97, + "1999": 98, + "2000": 99, + "2001": 100, + "2002": 101, + "2003": 102, + "2004": 103, + "2005": 104, + "2006": 105, + "2007": 106, + "2008": 107, + "2009": 108, + "2010": 109, + "2011": 110, + "2012": 111, + "2013": 112, + "2014": 113, + "2015": 114, +} diff --git a/iem/tas_historical_2km/merge.py b/iem/tas_historical_2km/merge.py new file mode 100644 index 0000000..9b98c48 --- /dev/null +++ b/iem/tas_historical_2km/merge.py @@ -0,0 +1,75 @@ +import os +import glob +import numpy as np +import tqdm +import xarray as xr +import rasterio as rio +import re + +# Set the working directory to the new path +os.chdir("/tmp/historical-temp/temperature/tas/") + +# Create a list of all the files in the directory +files = glob.glob("*.tif") + +for var_name in ["tas", "tasmax", "tasmin"]: + for month_num in range(1, 13): + formatted_month = f"{month_num:02}" + + # Regular expression pattern to match variable names followed by "degC" or "mm" + variable_pattern = re.compile( + rf"^{var_name}_mean_C_(.*?)_(.*?)_{formatted_month}_(.*?).tif$" + ) + + # No data value + nodata_value = -9999 + + # Get the projected x and y coordinates from a single geotiff + with rio.open(files[0]) as src: + cols, rows = np.meshgrid(np.arange(src.width), np.arange(src.height)) + xarr, yarr = rio.transform.xy(src.transform, rows, cols) + xcoords = xarr[0] + ycoords = np.array([a[0] for a in yarr]) + + data_arrays = [] + for i, file in enumerate(tqdm.tqdm(files)): + # Use regular expression to extract the variable name without units + match = variable_pattern.match(file) + if match: + variable_name = var_name + + with rio.open(file) as src: + data = src.read(1, masked=True) + data = np.where(data.mask, -9999, data) + + data_array = xr.DataArray( + data=np.expand_dims(data, axis=(0)), + dims=["year", "y", "x"], + coords=dict( + year=(["year"], [match.group(3)]), + y=(["y"], ycoords), + x=(["x"], xcoords), + ), + name=variable_name, + ) + + data_arrays.append(data_array) + + # first combine by coordinates to create a dataset for each variable. I think this saves time when the combining dataarrays with different names (variables). + var_datasets = [] + varnames = np.unique([da.name for da in data_arrays]) + for name in tqdm.tqdm(varnames): + var_datasets.append( + xr.combine_by_coords([da for da in data_arrays if da.name == name]) + ) + + # then, merge the variable datasets into one single Dataset + ds = xr.merge(var_datasets) + + # Define the CRS as EPSG:4269 or NAD83 + crs_dict = {"crs": "EPSG:3338"} + + # Add the CRS as an attribute to the dataset + ds.attrs.update(crs_dict) + + ds.to_netcdf(f"{var_name}_{formatted_month}_temperature.nc") diff --git a/iem/tas_projected_2km/ingest.json b/iem/tas_projected_2km/ingest.json index 31510e6..eead1be 100644 --- a/iem/tas_projected_2km/ingest.json +++ b/iem/tas_projected_2km/ingest.json @@ -32,16 +32,13 @@ "tasmax": "C", "tasmin": "C", "model": { - "0": "GFDL-CM3", - "1": "GISS-E2-R", - "2": "IPSL-CM5A-LR", - "3": "MRI-CGCM3", - "4": "NCAR-CCSM4" + "0": "5ModelAvg", + "1": "GFDL-CM3", + "2": "NCAR-CCSM4" }, "scenario": { "0": "rcp45", - "1": "rcp60", - "2": "rcp85" + "1": "rcp85" }, "month": { "0": "01", @@ -179,11 +176,10 @@ ], "axes": { "model": { - "statements": "import imp, os; luts = imp.load_source('luts', os.getenv('LUTS_PATH'));", - "min": "luts.models['GFDL-CM3']", - "max": "luts.models['NCAR-CCSM4']", - "directPositions": "[luts.models[x] for x in ${netcdf:variable:model}]", + "statements": "import imp, os; luts = imp.load_source('luts', os.getenv('LUTS_PATH')); regex_str = 'combined_(5ModelAvg|GFDL-CM3|NCAR-CCSM4)_(01|02|03|04|05|06|07|08|09|10|11|12)_temperature.nc'", + "min": "luts.models[regex_extract('${file:name}', regex_str, 1)]", "irregular": true, + "dataBound": false, "gridOrder": 0 }, "scenario": { @@ -194,10 +190,9 @@ "gridOrder": 1 }, "month": { - "min": "luts.months['01']", - "max": "luts.months['12']", - "directPositions": "[luts.months[x] for x in ${netcdf:variable:month}]", + "min": "luts.months[regex_extract('${file:name}', regex_str, 2)]", "irregular": true, + "dataBound": false, "gridOrder": 2 }, "year": { diff --git a/iem/tas_projected_2km/luts_ingest.json b/iem/tas_projected_2km/luts_ingest.json deleted file mode 100644 index 4e38d39..0000000 --- a/iem/tas_projected_2km/luts_ingest.json +++ /dev/null @@ -1,221 +0,0 @@ -{ - "config": { - "service_url": "http://localhost:8080/rasdaman/ows", - "tmp_directory": "/tmp/", - "crs_resolver": "http://localhost:8080/def/", - "default_crs": "http://localhost:8080/def/crs/EPSG/0/3338", - "default_null_values": [ - "-9999" - ], - "mock": false, - "automated": true - }, - "input": { - "coverage_id": "tas_2km_projected", - "paths": [ - "/usr/local/data/torgerso/temperature/tas/attempt/*.nc" - ] - }, - "recipe": { - "name": "general_coverage", - "options": { - "wms_import": true, - "import_order": "ascending", - "coverage": { - "crs": "OGC/0/Index1D?axis-label=\"model\"@OGC/0/Index1D?axis-label=\"scenario\"@OGC/0/Index1D?axis-label=\"month\"@OGC/0/Index1D?axis-label=\"year\"@EPSG/0/3338", - "metadata": { - "type": "xml", - "global": { - "Title": "'Projected 2km Temperature'", - "Encoding": { - "tas": "C", - "tasmax": "C", - "tasmin": "C", - "model": { - "0": "GFDL-CM3", - "1": "GISS-E2-R", - "2": "IPSL-CM5A-LR", - "3": "MRI-CGCM3", - "4": "NCAR-CCSM4" - }, - "scenario": { - "0": "rcp45", - "1": "rcp60", - "2": "rcp85" - }, - "month": { - "0": "01", - "1": "02", - "2": "03", - "3": "04", - "4": "05", - "5": "06", - "6": "07", - "7": "08", - "8": "09", - "9": "10", - "10": "11", - "11": "12" - }, - "year": { - "0": "2006", - "1": "2007", - "2": "2008", - "3": "2009", - "4": "2010", - "5": "2011", - "6": "2012", - "7": "2013", - "8": "2014", - "9": "2015", - "10": "2016", - "11": "2017", - "12": "2018", - "13": "2019", - "14": "2020", - "15": "2021", - "16": "2022", - "17": "2023", - "18": "2024", - "19": "2025", - "20": "2026", - "21": "2027", - "22": "2028", - "23": "2029", - "24": "2030", - "25": "2031", - "26": "2032", - "27": "2033", - "28": "2034", - "29": "2035", - "30": "2036", - "31": "2037", - "32": "2038", - "33": "2039", - "34": "2040", - "35": "2041", - "36": "2042", - "37": "2043", - "38": "2044", - "39": "2045", - "40": "2046", - "41": "2047", - "42": "2048", - "43": "2049", - "44": "2050", - "45": "2051", - "46": "2052", - "47": "2053", - "48": "2054", - "49": "2055", - "50": "2056", - "51": "2057", - "52": "2058", - "53": "2059", - "54": "2060", - "55": "2061", - "56": "2062", - "57": "2063", - "58": "2064", - "59": "2065", - "60": "2066", - "61": "2067", - "62": "2068", - "63": "2069", - "64": "2070", - "65": "2071", - "66": "2072", - "67": "2073", - "68": "2074", - "69": "2075", - "70": "2076", - "71": "2077", - "72": "2078", - "73": "2079", - "74": "2080", - "75": "2081", - "76": "2082", - "77": "2083", - "78": "2084", - "79": "2085", - "80": "2086", - "81": "2087", - "82": "2088", - "83": "2089", - "84": "2090", - "85": "2091", - "86": "2092", - "87": "2093", - "88": "2094", - "89": "2095", - "90": "2096", - "91": "2097", - "92": "2098", - "93": "2099", - "94": "2100" - } - } - } - }, - "slicer": { - "type": "netcdf", - "pixelIsPoint": true, - "bands": [ - { - "name": "tas", - "identifier": "tas", - "nilValue": "-9999.0" - }, - { - "name": "tasmax", - "identifier": "tasmax", - "nilValue": "-9999.0" - }, - { - "name": "tasmin", - "identifier": "tasmin", - "nilValue": "-9999.0" - } - ], - "axes": { - "model": { - "statements": "import imp, os; luts = imp.load_source('luts', os.getenv('LUTS_PATH')); regex_str = 'combined_(GFDL-CM3|GISS-E2-R|IPSL-CM5A-LR|MRI-CGCM3|NCAR-CCSM4)_(01|02|03|04|05|06|07|08|09|10|11|12)_temperature.nc'", - "min": "luts.models[regex_extract('${file:name}', regex_str, 1)]", - "irregular": true, - "gridOrder": 0 - }, - "scenario": { - "min": "0", - "max": "2", - "irregular": true, - "gridOrder": 1 - }, - "month": { - "min": "luts.months[regex_extract('${file:name}', regex_str, 2)]", - "irregular": true, - "gridOrder": 2 - }, - "year": { - "min": "0", - "max": "94", - "irregular": true, - "gridOrder": 3 - }, - "X": { - "min": "${netcdf:variable:x:min}", - "max": "${netcdf:variable:x:max}", - "resolution": "${netcdf:variable:x:resolution}", - "gridOrder": 5 - }, - "Y": { - "min": "${netcdf:variable:y:min}", - "max": "${netcdf:variable:y:max}", - "resolution": "${netcdf:variable:y:resolution}", - "gridOrder": 4 - } - } - } - } - } - } -} \ No newline at end of file diff --git a/iem/tas_projected_2km/merge.py b/iem/tas_projected_2km/merge.py index 128b869..cc644d8 100644 --- a/iem/tas_projected_2km/merge.py +++ b/iem/tas_projected_2km/merge.py @@ -50,12 +50,10 @@ data = np.where(data.mask, -9999, data) data_array = xr.DataArray( - data=np.expand_dims(data, axis=(0, 1, 2, 3)), - dims=["model", "scenario", "month", "year", "y", "x"], + data=np.expand_dims(data, axis=(0, 1)), + dims=["scenario", "year", "y", "x"], coords=dict( - model=(["model"], [model_name]), scenario=(["scenario"], [match.group(2)]), - month=(["month"], [formatted_month]), year=(["year"], [match.group(3)]), y=(["y"], ycoords), x=(["x"], xcoords),