From 487b51db9b9da355eab34828d170df2ecda02299 Mon Sep 17 00:00:00 2001 From: Cory Martin Date: Fri, 19 May 2023 13:14:54 -0400 Subject: [PATCH] Allow for flexibility to plot multiple datasets together (#102) * start of this data driver change * Trying to see what fails * Debugging changes * More changes * Small typo * One more issue * Norms * missing norm * Attempt at new test to plot two datatypes together * Make plot better * Fix title typo * YAML lint trap * Notebook test fix 1 * change bokeh * Update bokeh version to fix CI * Increment to minor version * Changes after develop merge --- .github/workflows/eva_tests_notebook.yml | 4 +- requirements-github.txt | 2 +- setup.py | 2 +- src/eva/data/cubed_sphere_restart.py | 85 +++--- src/eva/data/data_driver.py | 58 ++++ src/eva/data/eva_interactive.py | 10 +- src/eva/data/gsi_obs_space.py | 184 ++++++------ src/eva/data/ioda_obs_space.py | 273 +++++++++--------- src/eva/data/jedi_log.py | 8 +- src/eva/data/lat_lon.py | 86 +++--- src/eva/data/mon_data_space.py | 238 ++++++++------- src/eva/eva_base.py | 20 +- .../tests/config/testCubedSphereRestart.yaml | 2 +- .../config/testGsiObsSpaceAmsuaMetop-A.yaml | 2 +- .../tests/config/testGsiObsSpaceConvT.yaml | 2 +- .../config/testIodaObsSpaceAircraft.yaml | 2 +- .../config/testIodaObsSpaceAmsuaN19.yaml | 2 +- .../config/testIodaObsSpaceIASI_Metop-A.yaml | 2 +- src/eva/tests/config/testJediLog.yaml | 22 +- .../config/testMonDataSpaceHirs4Metop-A.yaml | 2 +- src/eva/tests/config/testMonSummary.yaml | 2 +- .../tests/config/testTwoDatasetsOnePlot.yaml | 87 ++++++ 22 files changed, 609 insertions(+), 486 deletions(-) create mode 100644 src/eva/data/data_driver.py create mode 100644 src/eva/tests/config/testTwoDatasetsOnePlot.yaml diff --git a/.github/workflows/eva_tests_notebook.yml b/.github/workflows/eva_tests_notebook.yml index 2c64cf63..bbf05d75 100644 --- a/.github/workflows/eva_tests_notebook.yml +++ b/.github/workflows/eva_tests_notebook.yml @@ -13,10 +13,10 @@ jobs: steps: # Setup Python - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.10.10 # Update conda - name: Update conda diff --git a/requirements-github.txt b/requirements-github.txt index f5f2894d..1a4e2af7 100644 --- a/requirements-github.txt +++ b/requirements-github.txt @@ -8,4 +8,4 @@ xarray==2022.6.0 seaborn==0.12.2 hvplot==0.8.2 nbconvert==6.5.4 -bokeh==2.4.3 +bokeh==3.1.1 diff --git a/setup.py b/setup.py index 1534508e..a4ad321f 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setuptools.setup( name='eva', - version='1.3.5', + version='1.4.0', author='Community owned code', description='Evaluation and Verification of an Analysis', url='https://github.com/JCSDA-internal/eva', diff --git a/src/eva/data/cubed_sphere_restart.py b/src/eva/data/cubed_sphere_restart.py index 5bc25603..14e4bbf7 100644 --- a/src/eva/data/cubed_sphere_restart.py +++ b/src/eva/data/cubed_sphere_restart.py @@ -54,63 +54,62 @@ class CubedSphereRestart(EvaBase): # ---------------------------------------------------------------------------------------------- - def execute(self, data_collections, timing): + def execute(self, dataset_config, data_collections, timing): - for dataset in self.config.get('datasets'): - # Filenames to be read into this collection - # ----------------------------------------- - fv3_filenames = get(dataset, self.logger, 'fv3_filenames') - orog_filenames = get(dataset, self.logger, 'orog_filenames') + # Filenames to be read into this collection + # ----------------------------------------- + fv3_filenames = get(dataset_config, self.logger, 'fv3_filenames') + orog_filenames = get(dataset_config, self.logger, 'orog_filenames') - # File variable type - variable = get(dataset, self.logger, 'variable') + # File variable type + variable = get(dataset_config, self.logger, 'variable') - # File resolution - resolution = get(dataset, self.logger, 'resolution') - resolution = int(resolution.replace('C', '')) + # File resolution + resolution = get(dataset_config, self.logger, 'resolution') + resolution = int(resolution.replace('C', '')) - # Get missing value threshold - # --------------------------- - threshold = float(get(dataset, self.logger, 'missing_value_threshold', 1.0e30)) + # Get missing value threshold + # --------------------------- + threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e30)) - # Get the groups to be read - # ------------------------- - groups = get(dataset, self.logger, 'groups') + # Get the groups to be read + # ------------------------- + groups = get(dataset_config, self.logger, 'groups') - for group in groups: + for group in groups: - # Group name and variables - group_name = get(group, self.logger, 'name') - group_vars = get(group, self.logger, 'variables', 'all') + # Group name and variables + group_name = get(group, self.logger, 'name') + group_vars = get(group, self.logger, 'variables', 'all') - # Set the collection name - collection_name = dataset['name'] + # Set the collection name + collection_name = dataset_config['name'] - var_dict = {} + var_dict = {} - # Loop through group vars to create data dictionary - for var in group_vars: - if var in ['geolon', 'geolat']: - var_dict[group_name + '::' + var] = (["lon", "lat", "tile"], - read_nc(orog_filenames, var, - resolution, self.logger)) + # Loop through group vars to create data dictionary + for var in group_vars: + if var in ['geolon', 'geolat']: + var_dict[group_name + '::' + var] = (["lon", "lat", "tile"], + read_nc(orog_filenames, var, + resolution, self.logger)) - else: - var_dict[group_name + '::' + var] = (["lon", "lat", "tile"], - read_nc(fv3_filenames, var, - resolution, self.logger)) + else: + var_dict[group_name + '::' + var] = (["lon", "lat", "tile"], + read_nc(fv3_filenames, var, + resolution, self.logger)) - # Create dataset from data dictionary - ds = xr.Dataset(var_dict) + # Create dataset_config from data dictionary + ds = xr.Dataset(var_dict) - # Assert that the collection contains at least one variable - if not ds.keys(): - self.logger.abort('Collection \'' + dataset['name'] + '\', group \'' + - group_name + '\' in file ' + filename + - ' does not have any variables.') + # Assert that the collection contains at least one variable + if not ds.keys(): + self.logger.abort('Collection \'' + dataset_config['name'] + '\', group \'' + + group_name + '\' in file ' + filename + + ' does not have any variables.') - # Add the dataset to the collections - data_collections.create_or_add_to_collection(collection_name, ds) + # Add the dataset_config to the collections + data_collections.create_or_add_to_collection(collection_name, ds) # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) diff --git a/src/eva/data/data_driver.py b/src/eva/data/data_driver.py new file mode 100644 index 00000000..bc5e0b2c --- /dev/null +++ b/src/eva/data/data_driver.py @@ -0,0 +1,58 @@ +# (C) Copyright 2021-2023 NOAA/NWS/EMC +# +# (C) Copyright 2021-2023 United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + + +# -------------------------------------------------------------------------------------------------- + + +from eva.utilities.config import get +from eva.eva_base import EvaBase, EvaFactory + +import importlib +import os + + +# -------------------------------------------------------------------------------------------------- + + +class DataDriver(EvaBase): + + def execute(self, data_collections, timing): + + # Get list of dataset dictionaries + datasets = get(self.config['data'], self.logger, 'datasets') + + # Loop over datasets + for dataset in datasets: + + # Extract name for this diagnostic data type + try: + eva_data_class_name = dataset['type'] + except Exception as e: + msg = '\'type\' key not found. \'diagnostic_data_config\': ' \ + f'{diagnostic_data_config}, error: {e}' + raise KeyError(msg) + + # Create the data object + creator = EvaFactory() + timing.start('DataObjectConstructor') + eva_data_object = creator.create_eva_object(eva_data_class_name, + 'data', + dataset, + self.logger, + timing) + timing.stop('DataObjectConstructor') + + # Prepare diagnostic data + self.logger.info(f'Running execute for {eva_data_object.name}') + timing.start('DataObjectExecute') + eva_data_object.execute(dataset, data_collections, timing) + timing.stop('DataObjectExecute') + +# -------------------------------------------------------------------------------------------------- diff --git a/src/eva/data/eva_interactive.py b/src/eva/data/eva_interactive.py index 82cb772c..02bc27c0 100644 --- a/src/eva/data/eva_interactive.py +++ b/src/eva/data/eva_interactive.py @@ -33,17 +33,17 @@ def __init__(self): def load_ioda(self, filename): self.filename = filename - eva_dict = {'datasets': [{'filenames': [filename], - 'groups': [], - 'missing_value_threshold': 1.0e06, - 'name': self.collection}]} + eva_dict = {'filenames': [filename], + 'groups': [], + 'missing_value_threshold': 1.0e06, + 'name': self.collection} creator = EvaFactory() eva_data_object = creator.create_eva_object('IodaObsSpace', 'data', eva_dict, self.logger, self.timer) - eva_data_object.execute(self.data_collection, self.timer) + eva_data_object.execute(eva_dict, self.data_collection, self.timer) def scatter(self, x, y): x_group, x_var = x.split('::') diff --git a/src/eva/data/gsi_obs_space.py b/src/eva/data/gsi_obs_space.py index dd77bb4e..c70ac91c 100644 --- a/src/eva/data/gsi_obs_space.py +++ b/src/eva/data/gsi_obs_space.py @@ -88,7 +88,7 @@ def subset_channels(ds, channels, logger, add_channels_variable=False): def satellite_dataset(ds): """ - Builds a new dataset to reshape satellite + Builds a new dataset_config to reshape satellite data. """ nchans = ds.dims['nchans'] @@ -134,7 +134,7 @@ def satellite_dataset(ds): data = np.reshape(ds[var].data, (iters, nchans)) data_vars[var] = (('nobs', 'nchans'), data) - # create dataset + # create dataset_config new_ds = Dataset(data_vars=data_vars, coords=coords, attrs=ds.attrs) @@ -149,102 +149,98 @@ class GsiObsSpace(EvaBase): # ---------------------------------------------------------------------------------------------- - def execute(self, data_collections, timeing): + def execute(self, dataset_config, data_collections, timeing): - # Loop over the datasets - # ---------------------- - for dataset in self.config.get('datasets'): + # Get channels for radiances + # -------------------------- + channels_str_or_list = get(dataset_config, self.logger, 'channels', []) - # Get channels for radiances - # -------------------------- - channels_str_or_list = get(dataset, self.logger, 'channels', []) + # Convert channels to list + channels = [] + if channels_str_or_list is not []: + channels = parse_channel_list(channels_str_or_list, self.logger) - # Convert channels to list - channels = [] - if channels_str_or_list is not []: - channels = parse_channel_list(channels_str_or_list, self.logger) + # Filenames to be read into this collection + # ----------------------------------------- + filenames = get(dataset_config, self.logger, 'filenames') - # Filenames to be read into this collection - # ----------------------------------------- - filenames = get(dataset, self.logger, 'filenames') - - # File variable type - if 'satellite' in dataset: - satellite = get(dataset, self.logger, 'satellite') - sensor = get(dataset, self.logger, 'sensor') - else: - variable = get(dataset, self.logger, 'variable') - - # Get missing value threshold - # --------------------------- - threshold = float(get(dataset, self.logger, 'missing_value_threshold', 1.0e30)) - - # Get the groups to be read - # ------------------------- - groups = get(dataset, self.logger, 'groups') - - # Loop over filenames - # ------------------- - for filename in filenames: - - # Loop over groups - for group in groups: - - # Group name and variables - group_name = get(group, self.logger, 'name') - group_vars = get(group, self.logger, 'variables', 'all') - - # Set the collection name - collection_name = dataset['name'] - - ds = open_dataset(filename, mask_and_scale=False, - decode_times=False) - - # If user specifies all variables set to group list - if group_vars == 'all': - group_vars = list(ds.data_vars) - - # Reshape variables if satellite diag - if 'nchans' in ds.dims: - ds = satellite_dataset(ds) - ds = subset_channels(ds, channels, self.logger) - - # Adjust variable names if uv - if 'variable' in locals(): - if variable == 'uv': - group_vars = uv(group_vars) - - # Check that all user variables are in the dataset - if not all(v in list(ds.data_vars) for v in group_vars): - self.logger.abort('For collection \'' + dataset['name'] + '\', group \'' + - group_name + '\' in file ' + filename + - f' . Variables {group_vars} not all present in ' + - f'the data set variables: {list(ds.keys())}') - - # Drop data variables not in user requested variables - vars_to_remove = list(set(list(ds.keys())) - set(group_vars)) - ds = ds.drop_vars(vars_to_remove) - - # Explicitly add the channels to the collection (we do not want to include this - # in the 'variables' list in the YAML to avoid transforms being applied to them) - if 'nchans' in ds.dims: - channels_used = ds['nchans'] - ds[group_name + '::channelNumber'] = channels_used - - # Rename variables with group - rename_dict = {} - for group_var in group_vars: - rename_dict[group_var] = group_name + '::' + group_var - ds = ds.rename(rename_dict) - - # Assert that the collection contains at least one variable - if not ds.keys(): - self.logger.abort('Collection \'' + dataset['name'] + '\', group \'' + - group_name + '\' in file ' + filename + - ' does not have any variables.') - - # Add the dataset to the collections - data_collections.create_or_add_to_collection(collection_name, ds, 'nobs') + # File variable type + if 'satellite' in dataset_config: + satellite = get(dataset_config, self.logger, 'satellite') + sensor = get(dataset_config, self.logger, 'sensor') + else: + variable = get(dataset_config, self.logger, 'variable') + + # Get missing value threshold + # --------------------------- + threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e30)) + + # Get the groups to be read + # ------------------------- + groups = get(dataset_config, self.logger, 'groups') + + # Loop over filenames + # ------------------- + for filename in filenames: + + # Loop over groups + for group in groups: + + # Group name and variables + group_name = get(group, self.logger, 'name') + group_vars = get(group, self.logger, 'variables', 'all') + + # Set the collection name + collection_name = dataset_config['name'] + + ds = open_dataset(filename, mask_and_scale=False, + decode_times=False) + + # If user specifies all variables set to group list + if group_vars == 'all': + group_vars = list(ds.data_vars) + + # Reshape variables if satellite diag + if 'nchans' in ds.dims: + ds = satellite_dataset(ds) + ds = subset_channels(ds, channels, self.logger) + + # Adjust variable names if uv + if 'variable' in locals(): + if variable == 'uv': + group_vars = uv(group_vars) + + # Check that all user variables are in the dataset_config + if not all(v in list(ds.data_vars) for v in group_vars): + self.logger.abort('For collection \'' + dataset_config['name'] + + '\', group \'' + group_name + '\' in file ' + filename + + f' . Variables {group_vars} not all present in ' + + f'the data set variables: {list(ds.keys())}') + + # Drop data variables not in user requested variables + vars_to_remove = list(set(list(ds.keys())) - set(group_vars)) + ds = ds.drop_vars(vars_to_remove) + + # Explicitly add the channels to the collection (we do not want to include this + # in the 'variables' list in the YAML to avoid transforms being applied to them) + if 'nchans' in ds.dims: + channels_used = ds['nchans'] + ds[group_name + '::channelNumber'] = channels_used + + # Rename variables with group + rename_dict = {} + for group_var in group_vars: + rename_dict[group_var] = group_name + '::' + group_var + ds = ds.rename(rename_dict) + + # Assert that the collection contains at least one variable + if not ds.keys(): + self.logger.abort('Collection \'' + dataset_config['name'] + '\', group \'' + + group_name + '\' in file ' + filename + + ' does not have any variables.') + + # Add the dataset_config to the collections + data_collections.create_or_add_to_collection(collection_name, ds, 'nobs') # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) diff --git a/src/eva/data/ioda_obs_space.py b/src/eva/data/ioda_obs_space.py index c253dc34..e59f4cf2 100644 --- a/src/eva/data/ioda_obs_space.py +++ b/src/eva/data/ioda_obs_space.py @@ -48,144 +48,141 @@ class IodaObsSpace(EvaBase): # ---------------------------------------------------------------------------------------------- - def execute(self, data_collections, timing): - - # Loop over the datasets - # ---------------------- - for dataset in self.config.get('datasets'): - - # Get channels for radiances - # -------------------------- - channels_str_or_list = get(dataset, self.logger, 'channels', []) - - # Convert channels to list - channels = [] - if channels_str_or_list is not []: - channels = parse_channel_list(channels_str_or_list, self.logger) - - # Filenames to be read into this collection - # ----------------------------------------- - filenames = get(dataset, self.logger, 'filenames') - - # Get missing value threshold - # --------------------------- - threshold = float(get(dataset, self.logger, 'missing_value_threshold', 1.0e30)) - - # Get the groups to be read - # ------------------------- - groups = get(dataset, self.logger, 'groups') - - # Loop over filenames - # ------------------- - total_loc = 0 - for filename in filenames: - # Assert that file exists - if not os.path.exists(filename): - logger.abort(f'In IodaObsSpace file \'{filename}\' does not exist') - - # Get file header - ds_header = open_dataset(filename) - - # Fix location in case ioda did not set it - locations_this_file = range(total_loc, total_loc + ds_header['Location'].size) - ds_header = ds_header.assign_coords({"Location": locations_this_file}) - total_loc = total_loc + ds_header['Location'].size - - if 'Cluster' in ds_header.keys(): - clusters_this_file = range(0, ds_header['Cluster'].size) - ds_header = ds_header.assign_coords({"Cluster": clusters_this_file}) - - # Read header part of the file to get coordinates - ds_groups = Dataset() - - # Save sensor_channels for later - add_channels = False - if 'Channel' in ds_header.keys(): - sensor_channels = ds_header['Channel'] - add_channels = True - - # Merge in the header and close - ds_groups = ds_groups.merge(ds_header) - ds_header.close() - - # Set the channels based on user selection and add channels variable - ds_groups = subset_channels(ds_groups, channels, True) - - # If groups is empty, read in file to retrieve group list - groups_present = True - if not groups: - groups_present = False - nc_ds = nc.Dataset(filename) - groups = list(nc_ds.groups.keys()) - nc_ds.close() - - # Loop over groups - for group in groups: - - # Group name and variables - if groups_present: - group_name = get(group, self.logger, 'name') - group_vars = get(group, self.logger, 'variables', 'all') - else: - group_name = group - group_vars = 'all' - - # Set the collection name - collection_name = dataset['name'] - - # Read the group - timing.start(f'IodaObsSpace: open_dataset {os.path.basename(filename)}') - ds = open_dataset(filename, group=group_name, mask_and_scale=False, - decode_times=False) - timing.stop(f'IodaObsSpace: open_dataset {os.path.basename(filename)}') - - # If user specifies all variables set to group list - if group_vars == 'all': - group_vars = list(ds.data_vars) - - # Check that all user variables are in the dataset - if not all(v in list(ds.data_vars) for v in group_vars): - self.logger.abort('For collection \'' + dataset['name'] + '\', group \'' + - group_name + '\' in file ' + filename + - f' . Variables {group_vars} not all present in ' + - f'the data set variables: {list(ds.keys())}') - - # Drop data variables not in user requested variables - vars_to_remove = list(set(list(ds.keys())) - set(group_vars)) - ds = ds.drop_vars(vars_to_remove) - - # Rename variables with group - rename_dict = {} - for group_var in group_vars: - rename_dict[group_var] = group_name + '::' + group_var - ds = ds.rename(rename_dict) - - # Reset channel numbers from header and copy channel numbers - # into MetaData for easier use - if add_channels: - ds['Channel'] = sensor_channels - # Explicitly add the channels to the collection (we do not want to - # include this in the 'variables' list in the YAML to avoid transforms - # being applied to them) - ds['MetaData::channelNumber'] = sensor_channels - - # Set channels - ds = subset_channels(ds, channels) - - # Assert that the collection contains at least one variable - if not ds.keys(): - self.logger.abort('Collection \'' + dataset['name'] + '\', group \'' + - group_name + '\' in file ' + filename + - ' does not have any variables.') - - # Merge with other groups - ds_groups = ds_groups.merge(ds) - - # Close dataset - ds.close() - - # Add the dataset to the collections - data_collections.create_or_add_to_collection(collection_name, ds_groups, 'Location') + def execute(self, dataset_config, data_collections, timing): + + # Get channels for radiances + # -------------------------- + channels_str_or_list = get(dataset_config, self.logger, 'channels', []) + + # Convert channels to list + channels = [] + if channels_str_or_list is not []: + channels = parse_channel_list(channels_str_or_list, self.logger) + + # Filenames to be read into this collection + # ----------------------------------------- + filenames = get(dataset_config, self.logger, 'filenames') + + # Get missing value threshold + # --------------------------- + threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e30)) + + # Get the groups to be read + # ------------------------- + groups = get(dataset_config, self.logger, 'groups') + + # Loop over filenames + # ------------------- + total_loc = 0 + for filename in filenames: + # Assert that file exists + if not os.path.exists(filename): + logger.abort(f'In IodaObsSpace file \'{filename}\' does not exist') + + # Get file header + ds_header = open_dataset(filename) + + # Fix location in case ioda did not set it + locations_this_file = range(total_loc, total_loc + ds_header['Location'].size) + ds_header = ds_header.assign_coords({"Location": locations_this_file}) + total_loc = total_loc + ds_header['Location'].size + + if 'Cluster' in ds_header.keys(): + clusters_this_file = range(0, ds_header['Cluster'].size) + ds_header = ds_header.assign_coords({"Cluster": clusters_this_file}) + + # Read header part of the file to get coordinates + ds_groups = Dataset() + + # Save sensor_channels for later + add_channels = False + if 'Channel' in ds_header.keys(): + sensor_channels = ds_header['Channel'] + add_channels = True + + # Merge in the header and close + ds_groups = ds_groups.merge(ds_header) + ds_header.close() + + # Set the channels based on user selection and add channels variable + ds_groups = subset_channels(ds_groups, channels, True) + + # If groups is empty, read in file to retrieve group list + groups_present = True + if not groups: + groups_present = False + nc_ds = nc.Dataset(filename) + groups = list(nc_ds.groups.keys()) + nc_ds.close() + + # Loop over groups + for group in groups: + + # Group name and variables + if groups_present: + group_name = get(group, self.logger, 'name') + group_vars = get(group, self.logger, 'variables', 'all') + else: + group_name = group + group_vars = 'all' + + # Set the collection name + collection_name = dataset_config['name'] + + # Read the group + timing.start(f'IodaObsSpace: open_dataset {os.path.basename(filename)}') + ds = open_dataset(filename, group=group_name, mask_and_scale=False, + decode_times=False) + timing.stop(f'IodaObsSpace: open_dataset {os.path.basename(filename)}') + + # If user specifies all variables set to group list + if group_vars == 'all': + group_vars = list(ds.data_vars) + + # Check that all user variables are in the dataset_config + if not all(v in list(ds.data_vars) for v in group_vars): + self.logger.abort('For collection \'' + dataset_config['name'] + + '\', group \'' + + group_name + '\' in file ' + filename + + f' . Variables {group_vars} not all present in ' + + f'the data set variables: {list(ds.keys())}') + + # Drop data variables not in user requested variables + vars_to_remove = list(set(list(ds.keys())) - set(group_vars)) + ds = ds.drop_vars(vars_to_remove) + + # Rename variables with group + rename_dict = {} + for group_var in group_vars: + rename_dict[group_var] = group_name + '::' + group_var + ds = ds.rename(rename_dict) + + # Reset channel numbers from header and copy channel numbers + # into MetaData for easier use + if add_channels: + ds['Channel'] = sensor_channels + # Explicitly add the channels to the collection (we do not want to + # include this in the 'variables' list in the YAML to avoid transforms + # being applied to them) + ds['MetaData::channelNumber'] = sensor_channels + + # Set channels + ds = subset_channels(ds, channels) + + # Assert that the collection contains at least one variable + if not ds.keys(): + self.logger.abort('Collection \'' + dataset_config['name'] + '\', group \'' + + group_name + '\' in file ' + filename + + ' does not have any variables.') + + # Merge with other groups + ds_groups = ds_groups.merge(ds) + + # Close dataset_config + ds.close() + + # Add the dataset_config to the collections + data_collections.create_or_add_to_collection(collection_name, ds_groups, 'Location') # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) diff --git a/src/eva/data/jedi_log.py b/src/eva/data/jedi_log.py index 46604e6a..a8fa25fe 100644 --- a/src/eva/data/jedi_log.py +++ b/src/eva/data/jedi_log.py @@ -40,13 +40,13 @@ class JediLog(EvaBase): # ---------------------------------------------------------------------------------------------- - def execute(self, data_collections, timing): + def execute(self, dataset_config, data_collections, timing): # Get name of the log file to parse - jedi_log_to_parse = self.config.get('jedi_log_to_parse') + jedi_log_to_parse = dataset_config.get('jedi_log_to_parse') # Collection name to use - collection_name = self.config.get('collection_name') + collection_name = dataset_config.get('collection_name') # Read log file into a string with open(jedi_log_to_parse) as jedi_log_to_parse_open: @@ -86,7 +86,7 @@ def execute(self, data_collections, timing): self.log_chunks.append('\n'.join(chunk)) # Get list of things to parse from the dictionary - data_to_parse = self.config.get('data_to_parse') + data_to_parse = dataset_config.get('data_to_parse') # Loop and add to dataset for metric in data_to_parse: diff --git a/src/eva/data/lat_lon.py b/src/eva/data/lat_lon.py index 1474da15..f14aa6fb 100644 --- a/src/eva/data/lat_lon.py +++ b/src/eva/data/lat_lon.py @@ -10,51 +10,47 @@ class LatLon(EvaBase): # ---------------------------------------------------------------------------------------------- - def execute(self, data_collections): - - # Loop over the datasets - # ---------------------- - for dataset in self.config.get('datasets'): - - # Filename to be read into this collection - filename = get(dataset, self.logger, 'filename') - # get list of variables - variables = get(dataset, self.logger, 'variables') - # Set the collection name - collection_name = dataset['name'] - # get 'group' name - group = get(dataset, self.logger, 'group') - - if group not in valid_groups: - self.logger.abort('For collection \'' + dataset['name'] + '\'' + - f' group \'{group}\' is not a valid group type for LatLon.' + - f' The valid types are {valid_groups}') - - # open the input netCDF file - ds = xr.open_dataset(filename) - - # Drop data variables not in user requested variables - vars_to_remove = list(set(list(ds.keys())) - set(variables)) - ds = ds.drop_vars(vars_to_remove) - - # for lat and lon, need to do a meshgrid so that each point has a lat and a lon - - # rename variables in dataset - rename_dict = {} - for v in variables: - rename_dict[v] = f'{group}::{v}' - ds = ds.rename(rename_dict) - - # Assert that the collection contains at least one variable - if not ds.keys(): - self.logger.abort('Collection \'' + dataset['name'] + '\', group \'' + - group + '\' in file ' + filename + - ' does not have any variables.') - - # add the dataset to the collections - data_collections.create_or_add_to_collection(collection_name, ds) - - ds.close() + def execute(self, dataset_config, data_collections): + + # Filename to be read into this collection + filename = get(dataset_config, self.logger, 'filename') + # get list of variables + variables = get(dataset_config, self.logger, 'variables') + # Set the collection name + collection_name = dataset_config['name'] + # get 'group' name + group = get(dataset_config, self.logger, 'group') + + if group not in valid_groups: + self.logger.abort('For collection \'' + dataset_config['name'] + '\'' + + f' group \'{group}\' is not a valid group type for LatLon.' + + f' The valid types are {valid_groups}') + + # open the input netCDF file + ds = xr.open_dataset(filename) + + # Drop data variables not in user requested variables + vars_to_remove = list(set(list(ds.keys())) - set(variables)) + ds = ds.drop_vars(vars_to_remove) + + # for lat and lon, need to do a meshgrid so that each point has a lat and a lon + + # rename variables in dataset_config + rename_dict = {} + for v in variables: + rename_dict[v] = f'{group}::{v}' + ds = ds.rename(rename_dict) + + # Assert that the collection contains at least one variable + if not ds.keys(): + self.logger.abort('Collection \'' + dataset_config['name'] + '\', group \'' + + group + '\' in file ' + filename + + ' does not have any variables.') + + # add the dataset_config to the collections + data_collections.create_or_add_to_collection(collection_name, ds) + + ds.close() # Display the contents of the collections for helping the user with making plots data_collections.display_collections() diff --git a/src/eva/data/mon_data_space.py b/src/eva/data/mon_data_space.py index bb69f983..a293f1df 100644 --- a/src/eva/data/mon_data_space.py +++ b/src/eva/data/mon_data_space.py @@ -28,129 +28,125 @@ class MonDataSpace(EvaBase): # ---------------------------------------------------------------------------------------------- - def execute(self, data_collections, timing): - - # Loop over the datasets - # ---------------------- - for dataset in self.config.get('datasets'): - - # Set the collection name - # ----------------------- - collection_name = get(dataset, self.logger, 'name') - - # Get control file and parse - # -------------------------- - control_file = get(dataset, self.logger, 'control_file') - coords, dims, attribs, nvars, vars, channo, scanpo = self.get_ctl_dict(control_file[0]) - ndims_used, dims_arr = self.get_ndims_used(dims) - - # Get the groups to be read - # ------------------------- - groups = get(dataset, self.logger, 'groups') - - # Trim coordinates - # ---------------- - coord_dict = { - 0: ['channels', 'Channel'], - 1: ['regions', 'Region'], - 2: ['levels', 'Level'] - } - drop_coord = [False, False, False] - requested_coord = [None, None, None] + def execute(self, dataset_config, data_collections, timing): + # Set the collection name + # ----------------------- + collection_name = get(dataset_config, self.logger, 'name') + + # Get control file and parse + # -------------------------- + control_file = get(dataset_config, self.logger, 'control_file') + coords, dims, attribs, nvars, vars, channo, scanpo = self.get_ctl_dict(control_file[0]) + ndims_used, dims_arr = self.get_ndims_used(dims) + + # Get the groups to be read + # ------------------------- + groups = get(dataset_config, self.logger, 'groups') + + # Trim coordinates + # ---------------- + coord_dict = { + 0: ['channels', 'Channel'], + 1: ['regions', 'Region'], + 2: ['levels', 'Level'] + } + drop_coord = [False, False, False] + requested_coord = [None, None, None] + + for x in range(len(coord_dict)): + str_or_list = get(dataset_config, self.logger, coord_dict[x][0], abort_on_failure=False) + if str_or_list is not None: + requested_coord[x] = parse_channel_list(str(str_or_list), self.logger) + drop_coord[x] = True + + # Set coordinate ranges + # --------------------- + x_range, y_range, z_range = self.get_dim_ranges(coords, dims, channo) + + # Filenames to be read into this collection + # ----------------------------------------- + filenames = get(dataset_config, self.logger, 'filenames') + ds_list = [] + + # Get missing value threshold + # --------------------------- + threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e30)) + + for filename in filenames: + + # read data file + darr, cycle_tm = self.read_ieee(filename, coords, dims, ndims_used, + dims_arr, nvars, vars) + + # add cycle as a variable to data array + cyc_darr = self.var_to_np_array(dims, ndims_used, dims_arr, cycle_tm) + + # create dataset from file contents + timestep_ds = None + + timestep_ds = self.load_dset(vars, nvars, coords, darr, dims, ndims_used, + dims_arr, x_range, y_range, z_range, cyc_darr) + + if attribs['sat']: + timestep_ds.attrs['satellite'] = attribs['sat'] + if attribs['sensor']: + timestep_ds.attrs['sensor'] = attribs['sensor'] + + # add cycle_tm dim for concat + timestep_ds['Time'] = cycle_tm.strftime("%Y%m%d%H") + + # Add this dataset to the list of ds_list + ds_list.append(timestep_ds) + + # Concatenate datasets from ds_list into a single dataset + ds = concat(ds_list, dim='Time') + + # Group name and variables + # ------------------------ + for group in groups: + group_name = get(group, self.logger, 'name') + group_vars = get(group, self.logger, 'variables', 'all') + + # Drop coordinates not in requested list + # -------------------------------------- for x in range(len(coord_dict)): - str_or_list = get(dataset, self.logger, coord_dict[x][0], abort_on_failure=False) - if str_or_list is not None: - requested_coord[x] = parse_channel_list(str(str_or_list), self.logger) - drop_coord[x] = True - - # Set coordinate ranges - # --------------------- - x_range, y_range, z_range = self.get_dim_ranges(coords, dims, channo) - - # Filenames to be read into this collection - # ----------------------------------------- - filenames = get(dataset, self.logger, 'filenames') - ds_list = [] - - # Get missing value threshold - # --------------------------- - threshold = float(get(dataset, self.logger, 'missing_value_threshold', 1.0e30)) - - for filename in filenames: - - # read data file - darr, cycle_tm = self.read_ieee(filename, coords, dims, ndims_used, - dims_arr, nvars, vars) - - # add cycle as a variable to data array - cyc_darr = self.var_to_np_array(dims, ndims_used, dims_arr, cycle_tm) - - # create dataset from file contents - timestep_ds = None - - timestep_ds = self.load_dset(vars, nvars, coords, darr, dims, ndims_used, - dims_arr, x_range, y_range, z_range, cyc_darr) - - if attribs['sat']: - timestep_ds.attrs['satellite'] = attribs['sat'] - if attribs['sensor']: - timestep_ds.attrs['sensor'] = attribs['sensor'] - - # add cycle_tm dim for concat - timestep_ds['Time'] = cycle_tm.strftime("%Y%m%d%H") - - # Add this dataset to the list of ds_list - ds_list.append(timestep_ds) - - # Concatenate datasets from ds_list into a single dataset - ds = concat(ds_list, dim='Time') - - # Group name and variables - # ------------------------ - for group in groups: - group_name = get(group, self.logger, 'name') - group_vars = get(group, self.logger, 'variables', 'all') - - # Drop coordinates not in requested list - # -------------------------------------- - for x in range(len(coord_dict)): - if drop_coord[x]: - ds = self.subset_coordinate(ds, coord_dict[x][1], requested_coord[x]) - - # If user specifies all variables set to group list - # ------------------------------------------------- - if group_vars == 'all': - group_vars = list(ds.data_vars) - - # Drop data variables not in user requested variables - # --------------------------------------------------- - vars_to_remove = list(set(list(ds.keys())) - set(group_vars)) - ds = ds.drop_vars(vars_to_remove) - - # Conditionally add channel as a variable using single dimension - if 'channel' in group_vars: - ds['channel'] = (['Channel'], channo) - - # Conditionally add scan position as a variable using single dimension - if 'scan' in group_vars: - ds['scan'] = (['scan'], scanpo) - - # Rename variables with group - rename_dict = {} - for group_var in group_vars: - rename_dict[group_var] = group_name + '::' + group_var - - ds = ds.rename(rename_dict) - - # Assert that the collection contains at least one variable - if not ds.keys(): - self.logger.abort('Collection \'' + dataset['name'] + '\', group \'' + - group_name + '\' in file ' + filename + - ' does not have any variables.') - - # Add the dataset to the collections - data_collections.create_or_add_to_collection(collection_name, ds, 'cycle') + if drop_coord[x]: + ds = self.subset_coordinate(ds, coord_dict[x][1], requested_coord[x]) + + # If user specifies all variables set to group list + # ------------------------------------------------- + if group_vars == 'all': + group_vars = list(ds.data_vars) + + # Drop data variables not in user requested variables + # --------------------------------------------------- + vars_to_remove = list(set(list(ds.keys())) - set(group_vars)) + ds = ds.drop_vars(vars_to_remove) + + # Conditionally add channel as a variable using single dimension + if 'channel' in group_vars: + ds['channel'] = (['Channel'], channo) + + # Conditionally add scan position as a variable using single dimension + if 'scan' in group_vars: + ds['scan'] = (['scan'], scanpo) + + # Rename variables with group + rename_dict = {} + for group_var in group_vars: + rename_dict[group_var] = group_name + '::' + group_var + + ds = ds.rename(rename_dict) + + # Assert that the collection contains at least one variable + if not ds.keys(): + self.logger.abort('Collection \'' + dataset_config['name'] + '\', group \'' + + group_name + '\' in file ' + filename + + ' does not have any variables.') + + # Add the dataset to the collections + data_collections.create_or_add_to_collection(collection_name, ds, 'cycle') # Nan out unphysical values data_collections.nan_float_values_outside_threshold(threshold) diff --git a/src/eva/eva_base.py b/src/eva/eva_base.py index 99d82ebe..92756ff4 100644 --- a/src/eva/eva_base.py +++ b/src/eva/eva_base.py @@ -145,33 +145,25 @@ def eva(eva_config, eva_logger=None): msg = "diagnostic config must contain 'data' and 'graphics'" raise KeyError(msg) - # Extract name for this diagnostic data type - try: - eva_data_class_name = diagnostic_config['data']['type'] - except Exception as e: - msg = '\'type\' key not found. \'diagnostic_data_config\': ' \ - f'{diagnostic_data_config}, error: {e}' - raise KeyError(msg) - # Create the data collections # --------------------------- data_collections = DataCollections() # Create the data object creator = EvaFactory() - timing.start('DataObjectConstructor') - eva_data_object = creator.create_eva_object(eva_data_class_name, + timing.start('DataConstructor') + eva_data_object = creator.create_eva_object('DataDriver', 'data', - diagnostic_config['data'], + diagnostic_config, eva_logger, timing) - timing.stop('DataObjectConstructor') + timing.stop('DataConstructor') # Prepare diagnostic data logger.info(f'Running execute for {eva_data_object.name}') - timing.start('DataObjectExecute') + timing.start('DataExecute') eva_data_object.execute(data_collections, timing) - timing.stop('DataObjectExecute') + timing.stop('DataExecute') # Create the transforms if 'transforms' in diagnostic_config: diff --git a/src/eva/tests/config/testCubedSphereRestart.yaml b/src/eva/tests/config/testCubedSphereRestart.yaml index e8db39fb..4ecf782d 100644 --- a/src/eva/tests/config/testCubedSphereRestart.yaml +++ b/src/eva/tests/config/testCubedSphereRestart.yaml @@ -3,9 +3,9 @@ diagnostics: # Data read # --------- - data: - type: CubedSphereRestart datasets: - name: experiment + type: CubedSphereRestart variable: T resolution: C48 fv3_filenames: diff --git a/src/eva/tests/config/testGsiObsSpaceAmsuaMetop-A.yaml b/src/eva/tests/config/testGsiObsSpaceAmsuaMetop-A.yaml index 241a9d46..2e3f6eab 100644 --- a/src/eva/tests/config/testGsiObsSpaceAmsuaMetop-A.yaml +++ b/src/eva/tests/config/testGsiObsSpaceAmsuaMetop-A.yaml @@ -3,9 +3,9 @@ diagnostics: # Data read # --------- - data: - type: GsiObsSpace datasets: - name: experiment + type: GsiObsSpace satellite: metop-a sensor: amsua filenames: diff --git a/src/eva/tests/config/testGsiObsSpaceConvT.yaml b/src/eva/tests/config/testGsiObsSpaceConvT.yaml index 9c0c047a..ae13bae8 100644 --- a/src/eva/tests/config/testGsiObsSpaceConvT.yaml +++ b/src/eva/tests/config/testGsiObsSpaceConvT.yaml @@ -3,9 +3,9 @@ diagnostics: # Data read # --------- - data: - type: GsiObsSpace datasets: - name: experiment + type: GsiObsSpace variable: t filenames: - ${data_input_path}/gsi_obs_space.conv_t_ges.2020092000.nc4 diff --git a/src/eva/tests/config/testIodaObsSpaceAircraft.yaml b/src/eva/tests/config/testIodaObsSpaceAircraft.yaml index ca3ef703..28f6299b 100644 --- a/src/eva/tests/config/testIodaObsSpaceAircraft.yaml +++ b/src/eva/tests/config/testIodaObsSpaceAircraft.yaml @@ -3,9 +3,9 @@ diagnostics: # Data read # --------- - data: - type: IodaObsSpace datasets: - name: experiment + type: IodaObsSpace filenames: - ${data_input_path}/ioda_obs_space.aircraft.hofx.2020-12-14T210000Z.nc4 groups: diff --git a/src/eva/tests/config/testIodaObsSpaceAmsuaN19.yaml b/src/eva/tests/config/testIodaObsSpaceAmsuaN19.yaml index a724f4b9..757662e5 100644 --- a/src/eva/tests/config/testIodaObsSpaceAmsuaN19.yaml +++ b/src/eva/tests/config/testIodaObsSpaceAmsuaN19.yaml @@ -3,9 +3,9 @@ diagnostics: # Data read # --------- - data: - type: IodaObsSpace datasets: - name: experiment + type: IodaObsSpace filenames: - ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T210000Z.nc4 channels: &channels 3,8 diff --git a/src/eva/tests/config/testIodaObsSpaceIASI_Metop-A.yaml b/src/eva/tests/config/testIodaObsSpaceIASI_Metop-A.yaml index a45b3d93..672abe0b 100644 --- a/src/eva/tests/config/testIodaObsSpaceIASI_Metop-A.yaml +++ b/src/eva/tests/config/testIodaObsSpaceIASI_Metop-A.yaml @@ -1,8 +1,8 @@ diagnostics: - data: - type: IodaObsSpace datasets: - name: experiment + type: IodaObsSpace filenames: - ${data_input_path}/ioda_obs_space.iasi_metop-a.hofx.2021-08-01T000000Z.nc4 channels: [16, 29, 32, 35, 38, 41, 44, 47, 49, 50, 51, 53, 55, 56, 57, 59, 61, 62, 63, 66, 68, diff --git a/src/eva/tests/config/testJediLog.yaml b/src/eva/tests/config/testJediLog.yaml index d1358f4d..7d47d381 100644 --- a/src/eva/tests/config/testJediLog.yaml +++ b/src/eva/tests/config/testJediLog.yaml @@ -3,11 +3,12 @@ diagnostics: # Data read # --------- - data: - type: JediLog - collection_name: jedi_log_test - jedi_log_to_parse: ${data_input_path}/jedi_log.var_rpcg.txt - data_to_parse: - convergence: true + datasets: + - type: JediLog + collection_name: jedi_log_test + jedi_log_to_parse: ${data_input_path}/jedi_log.var_rpcg.txt + data_to_parse: + convergence: true graphics: - figure: @@ -56,11 +57,12 @@ diagnostics: label: 'Normalized norm reduction' - data: - type: JediLog - collection_name: jedi_log_test - jedi_log_to_parse: ${data_input_path}/jedi_log.var_dripcg_ctest.txt - data_to_parse: - convergence: true + datasets: + - type: JediLog + collection_name: jedi_log_test + jedi_log_to_parse: ${data_input_path}/jedi_log.var_dripcg_ctest.txt + data_to_parse: + convergence: true # Make plots graphics: diff --git a/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml b/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml index 2fc8b8b6..68d2d71b 100644 --- a/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml +++ b/src/eva/tests/config/testMonDataSpaceHirs4Metop-A.yaml @@ -3,9 +3,9 @@ diagnostics: # Data read # --------- - data: - type: MonDataSpace datasets: - name: experiment + type: MonDataSpace satellite: metop-a sensor: hirs4 control_file: diff --git a/src/eva/tests/config/testMonSummary.yaml b/src/eva/tests/config/testMonSummary.yaml index 9848ce6e..debd2e19 100644 --- a/src/eva/tests/config/testMonSummary.yaml +++ b/src/eva/tests/config/testMonSummary.yaml @@ -10,9 +10,9 @@ diagnostics: # Data read # --------- - data: - type: MonDataSpace datasets: - name: experiment + type: MonDataSpace satellite: metop-a sensor: hirs4 control_file: diff --git a/src/eva/tests/config/testTwoDatasetsOnePlot.yaml b/src/eva/tests/config/testTwoDatasetsOnePlot.yaml new file mode 100644 index 00000000..d05111f2 --- /dev/null +++ b/src/eva/tests/config/testTwoDatasetsOnePlot.yaml @@ -0,0 +1,87 @@ +diagnostics: + + # Data read + # --------- +- data: + datasets: + - name: observations + type: IodaObsSpace + filenames: + - ${data_input_path}/ioda_obs_space.aircraft.hofx.2020-12-14T210000Z.nc4 + groups: + - name: ObsValue + variables: [airTemperature] + - name: MetaData + - name: forecast + type: CubedSphereRestart + variable: T + resolution: C48 + fv3_filenames: + - ${data_input_path}/20210323.150000.sfc_data.tile1.nc + - ${data_input_path}/20210323.150000.sfc_data.tile2.nc + - ${data_input_path}/20210323.150000.sfc_data.tile3.nc + - ${data_input_path}/20210323.150000.sfc_data.tile4.nc + - ${data_input_path}/20210323.150000.sfc_data.tile5.nc + - ${data_input_path}/20210323.150000.sfc_data.tile6.nc + orog_filenames: + - ${data_input_path}/C48_oro_data.tile1.nc + - ${data_input_path}/C48_oro_data.tile2.nc + - ${data_input_path}/C48_oro_data.tile3.nc + - ${data_input_path}/C48_oro_data.tile4.nc + - ${data_input_path}/C48_oro_data.tile5.nc + - ${data_input_path}/C48_oro_data.tile6.nc + groups: + - name: FV3Restart + variables: &variables [geolon, + geolat, + t2m] + + graphics: + # Map plots + # --------- + + # Observations + - batch figure: + variables: [t2m] + dynamic options: + - type: vminvmaxcmap + data variable: forecast::FV3Restart::t2m + figure: + figure size: [20,10] + layout: [1,1] + title: 'Aircraft T Observations | FV3 2m Temperature' + output name: map_plots/multi/fv3_t2m_aircraft_t.png + plots: + - mapping: + projection: plcarr + domain: global + add_map_features: ['coastline'] + add_colorbar: + label: Temperature + add_grid: + layers: + - type: MapGridded + longitude: + variable: forecast::FV3Restart::geolon + latitude: + variable: forecast::FV3Restart::geolat + data: + variable: forecast::FV3Restart::t2m + label: 2m T + colorbar: true + cmap: jet + vmin: 250 + vmax: 320 + - type: MapScatter + longitude: + variable: observations::MetaData::longitude + latitude: + variable: observations::MetaData::latitude + data: + variable: observations::ObsValue::airTemperature + markersize: 2 + label: Temperature + colorbar: true + cmap: jet + vmin: 250 + vmax: 320