Skip to content

Commit

Permalink
IODA Version 3 (#67)
Browse files Browse the repository at this point in the history
* use data class name in testing files

* Update tests to ioda v3

* remove rogue files

* Delete upgrade_all_obs_files.sh

* Remove rogue print

Co-authored-by: danholdaway <[email protected]>
  • Loading branch information
danholdaway and danholdaway authored Jan 24, 2023
1 parent ea2e632 commit f077607
Show file tree
Hide file tree
Showing 21 changed files with 70 additions and 51 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

setuptools.setup(
name='eva',
version='1.3.1',
version='1.3.2',
author='Community owned code',
description='Evaluation and Verification of an Analysis',
url='https://github.com/JCSDA-internal/eva',
Expand Down
21 changes: 16 additions & 5 deletions src/eva/data/data_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ def create_or_add_to_collection(self, collection_name, collection, concat_dimens

# ----------------------------------------------------------------------------------------------

def adjust_channel_dimension_name(self, channel_dimension_name):

for collection in self._collections.keys():
if channel_dimension_name in list(self._collections[collection].dims):
self._collections[collection] = \
self._collections[collection].rename_dims({channel_dimension_name: 'Channel'})
self._collections[collection] = \
self._collections[collection].set_index({'Channel': channel_dimension_name})

# ----------------------------------------------------------------------------------------------

def add_variable_to_collection(self, collection_name, group_name, variable_name, variable):

# Assert that new variable is an xarray Dataarray
Expand Down Expand Up @@ -112,16 +123,16 @@ def get_variable_data_array(self, collection_name, group_name, variable_name, ch
if channels is None:
return data_array
elif isinstance(channels, int) or not any(not isinstance(c, int) for c in channels):
# nchans must be a dimension if it will be used for selection
if 'nchans' not in list(self._collections[collection_name].dims):
self.logger.abort('In get_variable_data_array channels is provided but nchans ' +
'is not a dimension of the Dataset')
# Channel must be a dimension if it will be used for selection
if 'Channel' not in list(self._collections[collection_name].dims):
self.logger.abort(f'In get_variable_data_array channels is provided but ' +
f'Channel is not a dimension in Dataset')
# Make sure it is a list
channels_sel = []
channels_sel.append(channels)

# Create a new DataArray with the requested channels
data_array_channels = data_array.sel(nchans=channels_sel)
data_array_channels = data_array.sel(Channel=channels_sel)
return data_array_channels

else:
Expand Down
3 changes: 3 additions & 0 deletions src/eva/data/gsi_obs_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,8 @@ def execute(self, data_collections, timeing):
# Nan out unphysical values
data_collections.nan_float_values_outside_threshold(threshold)

# Change the channel dimension name
data_collections.adjust_channel_dimension_name('nchans')

# Display the contents of the collections for helping the user with making plots
data_collections.display_collections()
42 changes: 21 additions & 21 deletions src/eva/data/ioda_obs_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,33 @@

def subset_channels(ds, channels, add_channels_variable=False):

if 'nchans' in list(ds.dims):
if 'channel' in list(ds.dims):

# Number of user requested channels
nchan_use = len(channels)
channel_use = len(channels)

# Number of channels in the file
nchan_in_file = ds.nchans.size
channel_in_file = ds.channel.size

# If user provided no channels then use all channels
if nchan_use == 0:
nchan_use = nchan_in_file
if channel_use == 0:
channel_use = channel_in_file

# Keep needed channels and reset dimension in Dataset
if nchan_use < nchan_in_file:
ds = ds.sel(nchans=channels)
if channel_use < channel_in_file:
ds = ds.sel(channel=channels)

return ds


# --------------------------------------------------------------------------------------------------


def check_nlocs(nlocs):
if max(nlocs) == 0:
new_nlocs = range(nlocs.size)
nlocs = new_nlocs + nlocs
return nlocs
def check_location(location):
if max(location) == 0:
new_location = range(location.size)
location = new_location + location
return location


# --------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -95,17 +95,17 @@ def execute(self, data_collections, timing):
# Get file header
ds_header = open_dataset(filename)

# fix nlocs if they are all zeros
ds_header['nlocs'] = check_nlocs(ds_header['nlocs'])
# fix location if they are all zeros
ds_header['Location'] = check_location(ds_header['Location'])

# Read header part of the file to get coordinates
ds_groups = Dataset()

# Save sensor_channels for later
nchans_present = False
if 'nchans' in ds_header.keys():
sensor_channels = ds_header['nchans']
nchans_present = True
channel_present = False
if 'channel' in ds_header.keys():
sensor_channels = ds_header['channel']
channel_present = True

# Merge in the header and close
ds_groups = ds_groups.merge(ds_header)
Expand Down Expand Up @@ -152,8 +152,8 @@ def execute(self, data_collections, timing):
ds = ds.rename(rename_dict)

# Reset channel numbers from header
if nchans_present:
ds['nchans'] = sensor_channels
if channel_present:
ds['channel'] = sensor_channels

# Set channels
ds = subset_channels(ds, channels)
Expand All @@ -171,7 +171,7 @@ def execute(self, data_collections, timing):
ds.close()

# Add the dataset to the collections
data_collections.create_or_add_to_collection(collection_name, ds_groups, 'nlocs')
data_collections.create_or_add_to_collection(collection_name, ds_groups, 'Location')

# Nan out unphysical values
data_collections.nan_float_values_outside_threshold(threshold)
Expand Down
2 changes: 1 addition & 1 deletion src/eva/tests/config/testGsiObsSpaceAmsuaMetop-A.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ diagnostics:
satellite: metop-a
sensor: amsua
filenames:
- ${data_input_path}/diag_amsua_metop-a_ges.2020092200.nc4
- ${data_input_path}/gsi_obs_space.amsua_metop-a_ges.2020092200.nc4
channels: &channels 3,8
groups:
- name: GsiNcDiag
Expand Down
2 changes: 1 addition & 1 deletion src/eva/tests/config/testGsiObsSpaceConvT.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ diagnostics:
- name: experiment
variable: t
filenames:
- ${data_input_path}/diag_conv_t_ges.2020092000.nc4
- ${data_input_path}/gsi_obs_space.conv_t_ges.2020092000.nc4
groups:
- name: GsiNcDiag
variables: &variables [Obs_Minus_Forecast_adjusted,
Expand Down
4 changes: 2 additions & 2 deletions src/eva/tests/config/testIodaObsSpaceAircraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ diagnostics:
datasets:
- name: experiment
filenames:
- ${data_input_path}/aircraft.hofx.2020-12-14T210000Z.nc4
- ${data_input_path}/ioda_obs_space.aircraft.hofx.2020-12-14T210000Z.nc4
groups:
- name: ObsValue
variables: &variables [air_temperature, eastward_wind]
variables: &variables [airTemperature, windEastward]
- name: GsiHofXBc
#- name: GsiEffectiveQC
- name: hofx
Expand Down
4 changes: 2 additions & 2 deletions src/eva/tests/config/testIodaObsSpaceAmsuaN19.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ diagnostics:
datasets:
- name: experiment
filenames:
- ${data_input_path}/amsua_n19.hofx.2020-12-14T210000Z.nc4
- ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T210000Z.nc4
channels: &channels 3,8
groups:
- name: ObsValue
variables: &variables [brightness_temperature]
variables: &variables [brightnessTemperature]
- name: GsiHofXBc
#- name: GsiEffectiveQC
- name: hofx
Expand Down
14 changes: 8 additions & 6 deletions src/eva/tests/config/testIodaObsSpaceIASI_Metop-A.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ diagnostics:
datasets:
- name: experiment
filenames:
- ${data_input_path}/iasi_metop-a.hofx.2021-08-01T000000Z.nc4
- ${data_input_path}/ioda_obs_space.iasi_metop-a.hofx.2021-08-01T000000Z.nc4
channels: [16, 29, 32, 35, 38, 41, 44, 47, 49, 50, 51, 53, 55, 56, 57, 59, 61, 62, 63, 66, 68,
70, 72, 74, 76, 78, 79, 81, 82, 83, 84, 85, 86, 87, 89, 92, 93, 95, 97, 99, 101, 103,
104, 106, 109, 110, 111, 113, 116, 119, 122, 125, 128, 131, 133, 135, 138, 141, 144,
Expand All @@ -19,18 +19,20 @@ diagnostics:
756, 797, 867, 906, 921, 1027, 1046, 1090, 1098, 1121, 1133, 1173]
groups:
- name: ObsValue
variables: &variables ['brightness_temperature']
variables: &variables ['brightnessTemperature']
- name: GsiHofXBc
- name: hofx
- name: EffectiveQC
- name: MetaData
variables: ['sensorChannelNumber']
- name: GsiEffectiveQC
- name: GsiFinalObsError
- name: EffectiveError
transforms:

# Stats for hofx
- transform: channel_stats
channel_dimension_name: 'Location' # Just an example since Channel is the default
statistic list: ['Mean', 'Count']
variable_name: experiment::hofx::${variable}
for:
Expand Down Expand Up @@ -69,18 +71,18 @@ diagnostics:
layers:
- type: Scatter
x:
variable: experiment::MetaData::sensor_channel
variable: experiment::MetaData::sensorChannelNumber
y:
variable: experiment::hofxMean::brightness_temperature
variable: experiment::hofxMean::brightnessTemperature
markersize: 5
color: 'red'
label: 'JEDI h(x) versus channels (all obs)'
do_linear_regression: False
- type: Scatter
x:
variable: experiment::MetaData::sensor_channel
variable: experiment::MetaData::sensorChannelNumber
y:
variable: experiment::hofxPassedGSIQcMean::brightness_temperature
variable: experiment::hofxPassedGSIQcMean::brightnessTemperature
markersize: 5
color: 'green'
label: 'JEDI h(x) versus channels (passed GSI QC)'
Expand Down
4 changes: 2 additions & 2 deletions src/eva/tests/config/testJediLog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ diagnostics:
- data:
type: JediLog
collection_name: jedi_log_test
jedi_log_to_parse: ${data_input_path}/jedi_var_log_rpcg.txt
jedi_log_to_parse: ${data_input_path}/jedi_log.var_rpcg.txt
data_to_parse:
convergence: true

Expand Down Expand Up @@ -58,7 +58,7 @@ diagnostics:
- data:
type: JediLog
collection_name: jedi_log_test
jedi_log_to_parse: ${data_input_path}/jedi_var_log_dripcg_ctest.txt
jedi_log_to_parse: ${data_input_path}/jedi_log.var_dripcg_ctest.txt
data_to_parse:
convergence: true

Expand Down
3 changes: 0 additions & 3 deletions src/eva/tests/data/aircraft.hofx.2020-12-14T210000Z.nc4

This file was deleted.

3 changes: 0 additions & 3 deletions src/eva/tests/data/amsua_n19.hofx.2020-12-14T210000Z.nc4

This file was deleted.

3 changes: 0 additions & 3 deletions src/eva/tests/data/iasi_metop-a.hofx.2021-08-01T000000Z.nc4

This file was deleted.

Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
File renamed without changes.
5 changes: 4 additions & 1 deletion src/eva/transforms/channel_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def channel_stats(config, data_collections):
# Parse config for the expression and new collection/group/variable naming
variable_name_template = get(config, logger, 'variable_name')

# Parse config for the channel dimension name
stat_dim = get(config, logger, 'statistic_dimension', 'Location')

# Loop over the templates
for collection in collections:
for group in groups:
Expand All @@ -57,7 +60,7 @@ def channel_stats(config, data_collections):

for stat_function in stat_functions:
function_name = getattr(exp_var_data, stat_function.lower())
result = function_name(dim='nlocs')
result = function_name(dim=stat_dim)
# Add the new field to the data collections
cgv = split_collectiongroupvariable(logger, variable_name)
data_collections.add_variable_to_collection(cgv[0], cgv[1]+stat_function,
Expand Down

0 comments on commit f077607

Please sign in to comment.