From 828bf4d6f84644efdf35a8883aa5d92330e50350 Mon Sep 17 00:00:00 2001 From: George McCabe <23407799+georgemccabe@users.noreply.github.com> Date: Fri, 27 Dec 2024 16:59:08 -0700 Subject: [PATCH] change many wrappers to be consistent with finding input files. Errors are now thrown when any input file is not found, checking other input types even when another input was not found. This increased errors reported in unit tests, so updated tests to reflect this. --- .../test_ensemble_stat_wrapper.py | 47 +++-- .../wrappers/grid_diag/test_grid_diag.py | 161 +++++++-------- .../grid_stat/test_grid_stat_wrapper.py | 4 +- .../wrappers/ioda2nc/test_ioda2nc_wrapper.py | 6 +- .../wrappers/mode/test_mode_wrapper.py | 2 +- .../pytests/wrappers/mtd/test_mtd_wrapper.py | 6 +- .../wrappers/pb2nc/test_pb2nc_wrapper.py | 6 +- .../point_stat/test_point_stat_wrapper.py | 4 +- .../series_analysis/test_series_analysis.py | 25 ++- .../wavelet_stat/test_wavelet_stat.py | 4 +- metplus/wrappers/command_builder.py | 4 +- metplus/wrappers/compare_gridded_wrapper.py | 66 +----- metplus/wrappers/ensemble_stat_wrapper.py | 71 +------ metplus/wrappers/grid_diag_wrapper.py | 41 +--- metplus/wrappers/grid_stat_wrapper.py | 2 - metplus/wrappers/mode_wrapper.py | 31 +-- metplus/wrappers/mtd_wrapper.py | 16 +- metplus/wrappers/point_stat_wrapper.py | 4 +- metplus/wrappers/runtime_freq_wrapper.py | 189 ++++++++++++++---- metplus/wrappers/series_analysis_wrapper.py | 12 +- metplus/wrappers/wavelet_stat_wrapper.py | 19 +- 21 files changed, 319 insertions(+), 401 deletions(-) diff --git a/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py b/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py index 963966513b..b68a97a49f 100644 --- a/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py +++ b/internal/tests/pytests/wrappers/ensemble_stat/test_ensemble_stat_wrapper.py @@ -39,12 +39,12 @@ def set_minimum_config_settings(config, set_fields=True, set_obs=True): config.set('config', 'INIT_INCREMENT', '12H') config.set('config', 'LEAD_SEQ', '12H') config.set('config', 'LOOP_ORDER', 'times') - config.set('config', 'ENSEMBLE_STAT_N_MEMBERS', 1) + config.set('config', 'ENSEMBLE_STAT_N_MEMBERS', 2) config.set('config', 'ENSEMBLE_STAT_CONFIG_FILE', '{PARM_BASE}/met_config/EnsembleStatConfig_wrapped') config.set('config', 'FCST_ENSEMBLE_STAT_INPUT_DIR', fcst_dir) config.set('config', 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE', - '{init?fmt=%Y%m%d%H}/fcst_file_F{lead?fmt=%3H}') + '{init?fmt=%Y%m%d%H}/fcst_file_F{lead?fmt=%3H},{init?fmt=%Y%m%d%H}/fcst_file_F{lead?fmt=%3H}') if set_obs: config.set('config', 'OBS_ENSEMBLE_STAT_GRID_INPUT_DIR', obs_dir) config.set('config', 'OBS_ENSEMBLE_STAT_GRID_INPUT_TEMPLATE', @@ -67,16 +67,16 @@ def set_minimum_config_settings(config, set_fields=True, set_obs=True): (False, None, 3, 8, 0.7, 3), (True, 'obs_grid', 4, 8, 0.4, 0), (True, 'obs_grid', 4, 8, 0.7, 1), - (False, 'obs_grid', 4, 8, 0.7, 4), + (False, 'obs_grid', 4, 8, 0.7, 7), (True, 'point_grid', 4, 8, 0.4, 0), (True, 'point_grid', 4, 8, 0.7, 1), - (False, 'point_grid', 4, 8, 0.7, 4), + (False, 'point_grid', 4, 8, 0.7, 7), (True, 'ens_mean', 4, 8, 0.4, 0), (True, 'ens_mean', 4, 8, 0.7, 1), - (False, 'ens_mean', 4, 8, 0.7, 4), + (False, 'ens_mean', 4, 8, 0.7, 7), (True, 'ctrl', 4, 8, 0.4, 0), (True, 'ctrl', 4, 8, 0.7, 1), - (False, 'ctrl', 4, 8, 0.7, 4), + (False, 'ctrl', 4, 8, 0.7, 7), # still errors if more members than n_members found (True, 'low_n_member', 8, 8, 0.7, 6), (False, 'low_n_member', 8, 8, 0.7, 8), @@ -135,11 +135,13 @@ def test_ensemble_stat_missing_inputs(metplus_config, get_test_data_dir, allow_m 'FCST_VAR1_LEVELS': 'A06', 'OBS_VAR1_NAME': 'obs_file', 'OBS_VAR1_LEVELS': 'A06', - 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE': '{fcst_name}_A{level?fmt=%3H}', + 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE': '{fcst_name}_A{level?fmt=%3H},{fcst_name}_A{level?fmt=%3H}', }, f'{fcst_dir}/fcst_file_A006'), # 1 - don't set forecast level - ({'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE': 'fcst_file_A{level?fmt=%3H}'}, + ({'FCST_VAR1_NAME': 'fcst_file', + 'OBS_VAR1_NAME': 'obs_file', + 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE': 'fcst_file_A{level?fmt=%3H},fcst_file_A{level?fmt=%3H}'}, f'{fcst_dir}/fcst_file_A000'), ] ) @@ -159,16 +161,18 @@ def test_ensemble_stat_level_in_template(metplus_config, config_overrides, assert wrapper.isOK file_list_dir = wrapper.config.getdir('FILE_LISTS_DIR') - file_list_file = f"{file_list_dir}/20050807000000_12_ensemble_stat.txt" + file_list_file = f"{file_list_dir}/ensemble_stat_files_FCST_init_20050807000000_valid_20050807120000_lead_43200.txt" if os.path.exists(file_list_file): os.remove(file_list_file) wrapper.run_all_times() + assert os.path.exists(file_list_file) with open(file_list_file, 'r') as file_handle: filenames = file_handle.read().splitlines()[1:] - assert len(filenames) == 1 + assert len(filenames) == 2 assert filenames[0] == expected_filename + assert filenames[1] == expected_filename @pytest.mark.parametrize( @@ -860,20 +864,20 @@ def test_ensemble_stat_single_field(metplus_config, config_overrides, point_obs = ' ' ens_mean = ' ' if 'OBS_ENSEMBLE_STAT_POINT_INPUT_TEMPLATE' in config_overrides: - point_obs = f' -point_obs "{obs_dir}/{obs_point_template}" ' + point_obs = f' -point_obs {obs_dir}/{obs_point_template} ' if 'ENSEMBLE_STAT_ENS_MEAN_INPUT_TEMPLATE' in config_overrides: ens_mean = f' -ens_mean {ens_mean_dir}/{ens_mean_template} ' expected_cmds = [(f"{app_path} {verbosity} " - f"{file_list_dir}/20050807000000_12_ensemble_stat.txt " - f"{config_file}{point_obs}" - f'-grid_obs "{obs_dir}/2005080712/obs_file"{ens_mean}' - f"-outdir {out_dir}/2005080712"), + f"{file_list_dir}/ensemble_stat_files_FCST_init_20050807000000_valid_20050807120000_lead_43200.txt" + f"{point_obs}" + f'-grid_obs {obs_dir}/2005080712/obs_file{ens_mean}' + f"{config_file} -outdir {out_dir}/2005080712"), (f"{app_path} {verbosity} " - f"{file_list_dir}/20050807120000_12_ensemble_stat.txt " - f"{config_file}{point_obs}" - f'-grid_obs "{obs_dir}/2005080800/obs_file"{ens_mean}' - f"-outdir {out_dir}/2005080800"), + f"{file_list_dir}/ensemble_stat_files_FCST_init_20050807120000_valid_20050808000000_lead_43200.txt" + f"{point_obs}" + f'-grid_obs {obs_dir}/2005080800/obs_file{ens_mean}' + f"{config_file} -outdir {out_dir}/2005080800"), ] all_cmds = wrapper.run_all_times() @@ -905,7 +909,7 @@ def test_get_config_file(metplus_config): @pytest.mark.parametrize( 'config_overrides, expected_num_files', [ ({}, 4), - ({'ENSEMBLE_STAT_ENS_MEMBER_IDS': '1'}, 1), + ({'ENSEMBLE_STAT_ENS_MEMBER_IDS': '1'}, 2), ] ) @pytest.mark.wrapper_c @@ -926,13 +930,12 @@ def test_ensemble_stat_fill_missing(metplus_config, config_overrides, wrapper = EnsembleStatWrapper(config) file_list_file = os.path.join(wrapper.config.getdir('FILE_LISTS_DIR'), - '20050807000000_12_ensemble_stat.txt') + 'ensemble_stat_files_FCST_init_20050807000000_valid_20050807120000_lead_43200.txt') if os.path.exists(file_list_file): os.remove(file_list_file) all_cmds = wrapper.run_all_times() assert len(all_cmds) == 1 - with open(file_list_file, 'r') as file_handle: actual_num_files = len(file_handle.read().splitlines()) - 1 diff --git a/internal/tests/pytests/wrappers/grid_diag/test_grid_diag.py b/internal/tests/pytests/wrappers/grid_diag/test_grid_diag.py index d190d37f06..79dd849499 100644 --- a/internal/tests/pytests/wrappers/grid_diag/test_grid_diag.py +++ b/internal/tests/pytests/wrappers/grid_diag/test_grid_diag.py @@ -3,6 +3,7 @@ import pytest import os +import re from datetime import datetime from dateutil.relativedelta import relativedelta @@ -102,71 +103,60 @@ def test_grid_diag_missing_inputs(metplus_config, get_test_data_dir, @pytest.mark.parametrize( - 'time_info, expected_subset', [ - # all files - ({'init': '*', 'valid': '*', 'lead': '*'}, - ['init_20141031213015_valid_20141031213015_lead_000.nc', + 'runtime_freq,init_or_valid,expected_subset', [ + # run once + ('RUN_ONCE', 'INIT', + [['init_20141031213015_valid_20141031213015_lead_000.nc', 'init_20141031213015_valid_20141101213015_lead_024.nc', 'init_20141101093015_valid_20141101093015_lead_000.nc', 'init_20141101093015_valid_20141102093015_lead_024.nc', + ]]), + # once per init + ('RUN_ONCE_PER_INIT_OR_VALID', 'INIT', + [['init_20141031213015_valid_20141031213015_lead_000.nc', + 'init_20141031213015_valid_20141101213015_lead_024.nc', ], + ['init_20141101093015_valid_20141101093015_lead_000.nc', + 'init_20141101093015_valid_20141102093015_lead_024.nc', + ]]), + + # once per valid + ('RUN_ONCE_PER_INIT_OR_VALID', 'VALID', + [['init_20141031213015_valid_20141031213015_lead_000.nc', + 'init_20141030213015_valid_20141031213015_lead_024.nc'], + ['init_20141101093015_valid_20141101093015_lead_000.nc', + 'init_20141031093015_valid_20141101093015_lead_024.nc'], ]), - # specific init - ({'init': datetime(2014, 10, 31, 21, 30, 15), 'valid': '*', 'lead': '*'}, - ['init_20141031213015_valid_20141031213015_lead_000.nc', - 'init_20141031213015_valid_20141101213015_lead_024.nc', - ]), - # specific valid - ({'init': '*', 'valid': datetime(2014, 11, 1, 9, 30, 15), 'lead': '*'}, - ['init_20141101093015_valid_20141101093015_lead_000.nc', - ]), - # specific lead integer zero - ({'init': '*', 'valid': '*', 'lead': 0}, - ['init_20141031213015_valid_20141031213015_lead_000.nc', - 'init_20141101093015_valid_20141101093015_lead_000.nc', - ]), - # specific lead relativedelta non-zero - ({'init': '*', 'valid': '*', 'lead': relativedelta(hours=24)}, + # once per lead + ('RUN_ONCE_PER_LEAD', 'INIT', + [['init_20141031213015_valid_20141031213015_lead_000.nc', + 'init_20141101093015_valid_20141101093015_lead_000.nc'], ['init_20141031213015_valid_20141101213015_lead_024.nc', - 'init_20141101093015_valid_20141102093015_lead_024.nc', - ]), - # specific lead integer non-zero - ({'init': '*', 'valid': '*', 'lead': 86400}, - ['init_20141031213015_valid_20141101213015_lead_024.nc', - 'init_20141101093015_valid_20141102093015_lead_024.nc', - ]), - # specific init/valid/lead integer zero - ({'init': datetime(2014, 10, 31, 21, 30, 15), - 'valid': datetime(2014, 10, 31, 21, 30, 15), - 'lead': 0}, - ['init_20141031213015_valid_20141031213015_lead_000.nc', - ]), - # specific init/valid/lead relativedelta non-zero - ({'init': datetime(2014, 10, 31, 21, 30, 15), - 'valid': datetime(2014, 11, 1, 21, 30, 15), - 'lead': relativedelta(hours=24)}, - ['init_20141031213015_valid_20141101213015_lead_024.nc', - ]), - # specific init/valid/lead integer non-zero - ({'init': datetime(2014, 10, 31, 21, 30, 15), - 'valid': datetime(2014, 11, 1, 21, 30, 15), - 'lead': 86400}, - ['init_20141031213015_valid_20141101213015_lead_024.nc', - ]), + 'init_20141101093015_valid_20141102093015_lead_024.nc', + ]]), + # once for each + ('RUN_ONCE_FOR_EACH', 'INIT', + [['init_20141031213015_valid_20141031213015_lead_000.nc'], + ['init_20141031213015_valid_20141101213015_lead_024.nc'], + ['init_20141101093015_valid_20141101093015_lead_000.nc'], + ['init_20141101093015_valid_20141102093015_lead_024.nc'], + ]), ] ) @pytest.mark.wrapper -def test_get_all_files_and_subset(metplus_config, time_info, expected_subset): +def test_grid_diag_runtime_freq(metplus_config, runtime_freq, init_or_valid, expected_subset): """! Test to ensure that get_all_files only gets the files that are relevant to the runtime settings and not every file in the directory """ config = metplus_config - config.set('config', 'LOOP_BY', 'INIT') - config.set('config', 'GRID_DIAG_RUNTIME_FREQ', 'RUN_ONCE') - config.set('config', 'INIT_TIME_FMT', '%Y%m%d%H%M%S') - config.set('config', 'INIT_BEG', '20141031213015') - config.set('config', 'INIT_END', '20141101093015') - config.set('config', 'INIT_INCREMENT', '12H') + config.set('config', 'LOOP_BY', init_or_valid) + config.set('config', 'GRID_DIAG_RUNTIME_FREQ', runtime_freq) + config.set('config', f'{init_or_valid}_TIME_FMT', '%Y%m%d%H%M%S') + config.set('config', f'{init_or_valid}_INCREMENT', '12H') + config.set('config', f'{init_or_valid}_BEG', '20141031213015') + config.set('config', f'{init_or_valid}_END', '20141101093015') config.set('config', 'LEAD_SEQ', '0H, 24H') + config.set('config', 'FCST_VAR1_NAME', 'FCST') + config.set('config', 'FCST_VAR1_LEVELS', 'L0') input_dir = os.path.join(config.getdir('METPLUS_BASE'), 'internal', 'tests', @@ -177,38 +167,29 @@ def test_get_all_files_and_subset(metplus_config, time_info, expected_subset): ('init_{init?fmt=%Y%m%d%H%M%S}_valid_{valid?fmt=%Y%m%d%H%M%S}_' 'lead_{lead?fmt=%3H}.nc') ) - - expected_files = [] - for init, valid, lead in [('20141031213015', '20141031213015', '000'), - ('20141031213015', '20141101213015', '024'), - ('20141101093015', '20141101093015', '000'), - ('20141101093015', '20141102093015', '024')]: - filename = f'init_{init}_valid_{valid}_lead_{lead}.nc' - expected_files.append(os.path.join(input_dir, filename)) + config.set('config', 'GRID_DIAG_OUTPUT_DIR', config.getdir('OUTPUT_BASE')) wrapper = GridDiagWrapper(config) - wrapper.c_dict['ALL_FILES'] = wrapper.get_all_files() - - # convert list of lists into a single list to compare to expected results - - actual_files = [item['input0'] for item in wrapper.c_dict['ALL_FILES']] - actual_files = [item for sub in actual_files for item in sub] - assert actual_files == expected_files - - file_list_dict = wrapper.subset_input_files(time_info) - assert file_list_dict - if len(expected_subset) == 1: - file_list = [file_list_dict['input0']] - else: - with open(file_list_dict['input0'], 'r') as file_handle: - file_list = file_handle.readlines() - - file_list = file_list[1:] - assert len(file_list) == len(expected_subset) - - for actual_file, expected_file in zip(file_list, expected_subset): - actual_file = actual_file.strip() - assert os.path.basename(actual_file) == expected_file + wrapper.run_all_times() + assert len(wrapper.all_commands) == len(expected_subset) + file_list_files = [] + print(wrapper.all_commands) + pattern = r'-data\s+([^\s]+)' + for cmd, _ in wrapper.all_commands: + match = re.search(pattern, cmd) + if match: + file_list_files.append(match.group(1)) + + assert len(file_list_files) == len(expected_subset) + for actual_file, expected_files in zip(file_list_files, expected_subset): + expected_files_full = [os.path.join(input_dir, item) for item in expected_files] + if len(expected_files) == 1: + assert actual_file == expected_files_full[0] + else: + with open(actual_file, 'r') as file_handle: + file_list = file_handle.read().splitlines()[1:] + print(f'ACTUAL: {file_list}') + assert sorted(file_list) == sorted(expected_files_full) @pytest.mark.parametrize( @@ -367,16 +348,12 @@ def test_grid_diag(metplus_config, config_overrides, env_var_values, out_dir = wrapper.c_dict.get('OUTPUT_DIR') expected_cmds = [ - (f"{app_path} -data {file_list_dir}/grid_diag_files_input0" - "_init_20160929000000_valid_ALL_lead_ALL.txt " - f"-data {file_list_dir}/grid_diag_files_input1" - "_init_20160929000000_valid_ALL_lead_ALL.txt " - f"-config {config_file} -out {out_dir}/grid_diag.all.nc {verbosity}"), - (f"{app_path} -data {file_list_dir}/grid_diag_files_input0" - "_init_20160929060000_valid_ALL_lead_ALL.txt " - f"-data {file_list_dir}/grid_diag_files_input1" - "_init_20160929060000_valid_ALL_lead_ALL.txt " - f"-config {config_file} -out {out_dir}/grid_diag.all.nc {verbosity}"), + (f"{app_path} -data {file_list_dir}/grid_diag_files_input0_init_20160929000000_valid_ALL_lead_ALL.txt" + f" -data {file_list_dir}/grid_diag_files_input1_init_20160929000000_valid_ALL_lead_ALL.txt" + f" -config {config_file} -out {out_dir}/grid_diag.all.nc {verbosity}"), + (f"{app_path} -data {file_list_dir}/grid_diag_files_input0_init_20160929060000_valid_ALL_lead_ALL.txt" + f" -data {file_list_dir}/grid_diag_files_input1_init_20160929060000_valid_ALL_lead_ALL.txt" + f" -config {config_file} -out {out_dir}/grid_diag.all.nc {verbosity}"), ] all_cmds = wrapper.run_all_times() diff --git a/internal/tests/pytests/wrappers/grid_stat/test_grid_stat_wrapper.py b/internal/tests/pytests/wrappers/grid_stat/test_grid_stat_wrapper.py index 24778f34ec..0b7526a844 100644 --- a/internal/tests/pytests/wrappers/grid_stat/test_grid_stat_wrapper.py +++ b/internal/tests/pytests/wrappers/grid_stat/test_grid_stat_wrapper.py @@ -64,8 +64,8 @@ def set_minimum_config_settings(config): (False, 6, 12, 0.6, 1, True), (True, 12, 24, 0.5, 0, True), (True, 12, 24, 0.6, 1, True), - (False, 6, 12, 0.5, 6, False), - (True, 12, 24, 0.5, 12, False), + (False, 6, 12, 0.5, 10, False), + (True, 12, 24, 0.5, 20, False), ] ) @pytest.mark.wrapper_b diff --git a/internal/tests/pytests/wrappers/ioda2nc/test_ioda2nc_wrapper.py b/internal/tests/pytests/wrappers/ioda2nc/test_ioda2nc_wrapper.py index c6e89fbd25..506774d915 100644 --- a/internal/tests/pytests/wrappers/ioda2nc/test_ioda2nc_wrapper.py +++ b/internal/tests/pytests/wrappers/ioda2nc/test_ioda2nc_wrapper.py @@ -37,12 +37,12 @@ def set_minimum_config_settings(config): (16, 24, 0.3, 16, False, 'RUN_ONCE_FOR_EACH'), (2, 4, 0.4, 0, True, 'RUN_ONCE_PER_INIT_OR_VALID'), (2, 4, 0.6, 1, True, 'RUN_ONCE_PER_INIT_OR_VALID'), - (2, 4, 0.6, 2, False, 'RUN_ONCE_PER_INIT_OR_VALID'), + (2, 4, 0.6, 16, False, 'RUN_ONCE_PER_INIT_OR_VALID'), (2, 5, 0.4, 0, True, 'RUN_ONCE_PER_LEAD'), (2, 5, 0.7, 1, True, 'RUN_ONCE_PER_LEAD'), - (2, 5, 0.4, 2, False, 'RUN_ONCE_PER_LEAD'), + (2, 5, 0.4, 17, False, 'RUN_ONCE_PER_LEAD'), (0, 1, 0.4, 0, True, 'RUN_ONCE'), - (0, 1, 0.4, 0, False, 'RUN_ONCE'), + (0, 1, 0.4, 16, False, 'RUN_ONCE'), ] ) @pytest.mark.wrapper diff --git a/internal/tests/pytests/wrappers/mode/test_mode_wrapper.py b/internal/tests/pytests/wrappers/mode/test_mode_wrapper.py index cf2806c2e4..a97bc995aa 100644 --- a/internal/tests/pytests/wrappers/mode/test_mode_wrapper.py +++ b/internal/tests/pytests/wrappers/mode/test_mode_wrapper.py @@ -62,7 +62,7 @@ def set_minimum_config_settings(config): 'missing, run, thresh, errors, allow_missing', [ (6, 12, 0.5, 0, True), (6, 12, 0.6, 1, True), - (6, 12, 0.5, 6, False), + (6, 12, 0.5, 10, False), ] ) @pytest.mark.wrapper_a diff --git a/internal/tests/pytests/wrappers/mtd/test_mtd_wrapper.py b/internal/tests/pytests/wrappers/mtd/test_mtd_wrapper.py index 6506afbff5..d5546946b7 100644 --- a/internal/tests/pytests/wrappers/mtd/test_mtd_wrapper.py +++ b/internal/tests/pytests/wrappers/mtd/test_mtd_wrapper.py @@ -80,13 +80,13 @@ def set_minimum_config_settings(config, set_inputs=True): (1, 3, 0.3, 0, True, 'CHOCOLATE'), (1, 3, 0.3, 0, True, 'BOTH'), (1, 3, 0.8, 1, True, 'BOTH'), - (1, 3, 0.8, 1, False, 'BOTH'), + (1, 3, 0.8, 22, False, 'BOTH'), (1, 3, 0.3, 0, True, 'FCST'), (1, 3, 0.8, 1, True, 'FCST'), - (1, 3, 0.8, 1, False, 'FCST'), + (1, 3, 0.8, 12, False, 'FCST'), (1, 3, 0.3, 0, True, 'OBS'), (1, 3, 0.8, 1, True, 'OBS'), - (1, 3, 0.8, 1, False, 'OBS'), + (1, 3, 0.8, 10, False, 'OBS'), ] ) @pytest.mark.wrapper_a diff --git a/internal/tests/pytests/wrappers/pb2nc/test_pb2nc_wrapper.py b/internal/tests/pytests/wrappers/pb2nc/test_pb2nc_wrapper.py index 09f1807253..9d5c7006e8 100644 --- a/internal/tests/pytests/wrappers/pb2nc/test_pb2nc_wrapper.py +++ b/internal/tests/pytests/wrappers/pb2nc/test_pb2nc_wrapper.py @@ -33,12 +33,12 @@ def pb2nc_wrapper(metplus_config): (16, 24, 0.3, 16, False, 'RUN_ONCE_FOR_EACH'), (2, 4, 0.4, 0, True, 'RUN_ONCE_PER_INIT_OR_VALID'), (2, 4, 0.6, 1, True, 'RUN_ONCE_PER_INIT_OR_VALID'), - (2, 4, 0.6, 2, False, 'RUN_ONCE_PER_INIT_OR_VALID'), + (2, 4, 0.6, 16, False, 'RUN_ONCE_PER_INIT_OR_VALID'), (2, 5, 0.4, 0, True, 'RUN_ONCE_PER_LEAD'), (2, 5, 0.7, 1, True, 'RUN_ONCE_PER_LEAD'), - (2, 5, 0.4, 2, False, 'RUN_ONCE_PER_LEAD'), + (2, 5, 0.4, 17, False, 'RUN_ONCE_PER_LEAD'), (0, 1, 0.4, 0, True, 'RUN_ONCE'), - (0, 1, 0.4, 0, False, 'RUN_ONCE'), + (0, 1, 0.4, 16, False, 'RUN_ONCE'), ] ) @pytest.mark.wrapper diff --git a/internal/tests/pytests/wrappers/point_stat/test_point_stat_wrapper.py b/internal/tests/pytests/wrappers/point_stat/test_point_stat_wrapper.py index b76ac81d53..648ee7ca8d 100755 --- a/internal/tests/pytests/wrappers/point_stat/test_point_stat_wrapper.py +++ b/internal/tests/pytests/wrappers/point_stat/test_point_stat_wrapper.py @@ -62,8 +62,8 @@ def set_minimum_config_settings(config): (False, 6, 12, 0.6, 1, True), (True, 12, 24, 0.5, 0, True), (True, 12, 24, 0.6, 1, True), - (False, 6, 12, 0.5, 6, False), - (True, 12, 24, 0.5, 12, False), + (False, 6, 12, 0.5, 10, False), + (True, 12, 24, 0.5, 20, False), ] ) @pytest.mark.wrapper_a diff --git a/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py b/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py index 0f10114c25..98123a692f 100644 --- a/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py +++ b/internal/tests/pytests/wrappers/series_analysis/test_series_analysis.py @@ -793,18 +793,18 @@ def test_get_storms_list(metplus_config): 'obs/20141214_00/ML1221072014/OBS_TILE_F012_gfs_4_20141214_0000_012.nc', ]), # 3: filter by lead all storms - ({'init': '*', - 'valid': '*', - 'lead': 21600, - 'storm_id': '*'}, - [ - 'fcst/20141214_00/ML1201072014/FCST_TILE_F006_gfs_4_20141214_0000_006.nc', - 'fcst/20141214_00/ML1221072014/FCST_TILE_F006_gfs_4_20141214_0000_006.nc', - ], - [ - 'obs/20141214_00/ML1201072014/OBS_TILE_F006_gfs_4_20141214_0000_006.nc', - 'obs/20141214_00/ML1221072014/OBS_TILE_F006_gfs_4_20141214_0000_006.nc', - ]), + # ({'init': '*', + # 'valid': '*', + # 'lead': 21600, + # 'storm_id': '*'}, + # [ + # 'fcst/20141214_00/ML1201072014/FCST_TILE_F006_gfs_4_20141214_0000_006.nc', + # 'fcst/20141214_00/ML1221072014/FCST_TILE_F006_gfs_4_20141214_0000_006.nc', + # ], + # [ + # 'obs/20141214_00/ML1201072014/OBS_TILE_F006_gfs_4_20141214_0000_006.nc', + # 'obs/20141214_00/ML1221072014/OBS_TILE_F006_gfs_4_20141214_0000_006.nc', + # ]), ] ) @pytest.mark.wrapper_a @@ -857,7 +857,6 @@ def test_get_all_files_and_subset(metplus_config, time_info, expect_fcst_subset, for expected in expected_fcst: expected_fcst_files.append(os.path.join(tile_input_dir, expected)) - expected_obs = [ 'obs/20141214_00/ML1201072014/OBS_TILE_F000_gfs_4_20141214_0000_000.nc', 'obs/20141214_00/ML1201072014/OBS_TILE_F006_gfs_4_20141214_0000_006.nc', diff --git a/internal/tests/pytests/wrappers/wavelet_stat/test_wavelet_stat.py b/internal/tests/pytests/wrappers/wavelet_stat/test_wavelet_stat.py index 52e4528ad2..32d0e91423 100644 --- a/internal/tests/pytests/wrappers/wavelet_stat/test_wavelet_stat.py +++ b/internal/tests/pytests/wrappers/wavelet_stat/test_wavelet_stat.py @@ -61,8 +61,8 @@ def set_minimum_config_settings(config): (False, 6, 12, 0.6, 1, True), (True, 12, 24, 0.5, 0, True), (True, 12, 24, 0.6, 1, True), - (False, 6, 12, 0.5, 6, False), - (True, 12, 24, 0.5, 12, False), + (False, 6, 12, 0.5, 10, False), + (True, 12, 24, 0.5, 20, False), ] ) @pytest.mark.wrapper_b diff --git a/metplus/wrappers/command_builder.py b/metplus/wrappers/command_builder.py index 9292bbd73d..15c93f3eb0 100755 --- a/metplus/wrappers/command_builder.py +++ b/metplus/wrappers/command_builder.py @@ -906,10 +906,10 @@ def _check_expected_ensembles(self, input_files): files were found, fill in input_files list with MISSING to allow valid threshold check inside MET tool to work properly. """ - num_expected = self.c_dict['N_MEMBERS'] + num_expected = self.c_dict.get('N_MEMBERS') # if expected members count is unset, skip check - if num_expected == MISSING_DATA_VALUE: + if num_expected is None or num_expected == MISSING_DATA_VALUE: return True num_found = len(input_files) diff --git a/metplus/wrappers/compare_gridded_wrapper.py b/metplus/wrappers/compare_gridded_wrapper.py index 86ef774cdc..2a1b4cbc95 100755 --- a/metplus/wrappers/compare_gridded_wrapper.py +++ b/metplus/wrappers/compare_gridded_wrapper.py @@ -71,6 +71,7 @@ def create_c_dict(self): self.add_met_config(name='output_prefix', data_type='string') + c_dict['VAR_LIST_OPTIONAL'] = False c_dict['VAR_LIST_TEMP'] = parse_var_list(self.config, met_tool=self.app_name) @@ -91,37 +92,16 @@ def run_at_time_once(self, time_info): @param time_info dictionary containing timing information """ - var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) - if not var_list and not self.c_dict.get('VAR_LIST_OPTIONAL', False): - self.log_error('No input fields were specified.' - ' [FCST/OBS]_VAR_NAME must be set.') - return - - if self.c_dict.get('ONCE_PER_FIELD', False): - # loop over all fields and levels (and probability thresholds) and - # call the app once for each - for var_info in var_list: - self.clear() - self.c_dict['CURRENT_VAR_INFO'] = var_info - add_field_info_to_time_info(time_info, var_info) - self.run_count += 1 - if not self.find_input_files(time_info): - self.missing_input_count += 1 - continue - self.run_at_time_one_field(time_info, var_info) - else: - # loop over all variables and all them to the field list, - # then call the app once + for file_dict in self.c_dict['ALL_FILES']: + if file_dict is None: continue + self.clear() + var_list = file_dict['var_list'] if var_list: self.c_dict['CURRENT_VAR_INFO'] = var_list[0] - add_field_info_to_time_info(time_info, var_list[0]) + self.add_to_infiles(file_dict, time_info) - self.clear() - self.run_count += 1 - if not self.find_input_files(time_info): - self.missing_input_count += 1 - return - self.run_at_time_all_fields(time_info) + runtime_info = file_dict.get('time_info', time_info) + self.run_at_time_all_fields(runtime_info, var_list) def find_input_files(self, time_info): # get model from first var to compare @@ -162,39 +142,13 @@ def find_input_files(self, time_info): return offset_time_info - def run_at_time_one_field(self, time_info, var_info): - """! Build MET command for a single field for a given - init/valid time and forecast lead combination - Args: - @param time_info dictionary containing timing information - @param var_info object containing variable information - """ - # get field info field a single field to pass to the MET config file - fcst_field_list = self.format_field_info(var_info=var_info, - data_type='FCST') - - obs_field_list = self.format_field_info(var_info=var_info, - data_type='OBS') - - if fcst_field_list is None or obs_field_list is None: - return - - fcst_fields = ','.join(fcst_field_list) - obs_fields = ','.join(obs_field_list) - - self.format_field('FCST', fcst_fields) - self.format_field('OBS', obs_fields) - - self.process_fields(time_info) - - def run_at_time_all_fields(self, time_info): + def run_at_time_all_fields(self, time_info, var_list): """! Build MET command for all of the field/level combinations for a given init/valid time and forecast lead combination @param time_info dictionary containing timing information + @param var_list list of field info """ - var_list = sub_var_list(self.c_dict['VAR_LIST_TEMP'], time_info) - # set field info fcst_field = self.get_all_field_info(var_list, 'FCST') obs_field = self.get_all_field_info(var_list, 'OBS') diff --git a/metplus/wrappers/ensemble_stat_wrapper.py b/metplus/wrappers/ensemble_stat_wrapper.py index 0f76f35e20..de4ac9cdce 100755 --- a/metplus/wrappers/ensemble_stat_wrapper.py +++ b/metplus/wrappers/ensemble_stat_wrapper.py @@ -169,47 +169,13 @@ def create_c_dict(self): # fill inputs that are not found with fake path to note it is missing c_dict['FCST_FILL_MISSING'] = True - c_dict['OBS_POINT_INPUT_DIR'] = ( - self.config.getdir('OBS_ENSEMBLE_STAT_POINT_INPUT_DIR', '') - ) - - c_dict['OBS_POINT_INPUT_TEMPLATE'] = ( - self.config.getraw('config', - 'OBS_ENSEMBLE_STAT_POINT_INPUT_TEMPLATE') - ) - - c_dict['OBS_GRID_INPUT_DIR'] = ( - self.config.getdir('OBS_ENSEMBLE_STAT_GRID_INPUT_DIR', '') - ) - - c_dict['OBS_GRID_INPUT_TEMPLATE'] = ( - self.config.getraw('config', - 'OBS_ENSEMBLE_STAT_GRID_INPUT_TEMPLATE') - ) - - # The ensemble forecast files input directory and filename templates - c_dict['FCST_INPUT_DIR'] = ( - self.config.getdir('FCST_ENSEMBLE_STAT_INPUT_DIR', '') - ) - - c_dict['FCST_INPUT_TEMPLATE'] = ( - self.config.getraw('config', 'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE') - ) - c_dict['FCST_INPUT_FILE_LIST'] = ( - self.config.getraw('config', 'FCST_ENSEMBLE_STAT_INPUT_FILE_LIST') - ) - if (not c_dict['FCST_INPUT_TEMPLATE'] and - not c_dict['FCST_INPUT_FILE_LIST']): - self.log_error("Must set FCST_ENSEMBLE_STAT_INPUT_TEMPLATE or " - "FCST_ENSEMBLE_STAT_INPUT_FILE_LIST") - - # optional -ens_mean argument path - c_dict['ENS_MEAN_INPUT_DIR'] = ( - self.config.getdir('ENSEMBLE_STAT_ENS_MEAN_INPUT_DIR', '')) - - c_dict['ENS_MEAN_INPUT_TEMPLATE'] = ( - self.config.getraw('config', - 'ENSEMBLE_STAT_ENS_MEAN_INPUT_TEMPLATE')) + self.get_input_templates(c_dict, { + 'CTRL': {'prefix': 'ENSEMBLE_STAT_CTRL', 'required': False}, + 'FCST': {'prefix': 'FCST_ENSEMBLE_STAT', 'required': True}, + 'OBS_POINT': {'prefix': 'OBS_ENSEMBLE_STAT_POINT', 'required': False}, + 'OBS_GRID': {'prefix': 'OBS_ENSEMBLE_STAT_GRID', 'required': False}, + 'ENS_MEAN': {'prefix': 'ENSEMBLE_STAT_ENS_MEAN', 'required': False}, + }) c_dict['OUTPUT_DIR'] = ( self.config.getdir('ENSEMBLE_STAT_OUTPUT_DIR', '') @@ -223,14 +189,6 @@ def create_c_dict(self): 'ENSEMBLE_STAT_OUTPUT_TEMPLATE') ) - # get ctrl (control) template/dir - optional - c_dict['CTRL_INPUT_TEMPLATE'] = ( - self.config.getraw('config', 'ENSEMBLE_STAT_CTRL_INPUT_TEMPLATE') - ) - c_dict['CTRL_INPUT_DIR'] = ( - self.config.getdir('ENSEMBLE_STAT_CTRL_INPUT_DIR', '') - ) - # get climatology config variables self.handle_climo_dict() @@ -373,24 +331,11 @@ def create_c_dict(self): data_type='list', extra_args={'remove_quotes': True}) - # signifies that the tool can be run without setting - # field information for fcst and obs - c_dict['VAR_LIST_OPTIONAL'] = True - - # parse var list for ENS fields - c_dict['ENS_VAR_LIST_TEMP'] = parse_var_list( - self.config, - data_type='ENS', - met_tool=self.app_name - ) - # parse optional var list for FCST and/or OBS fields c_dict['VAR_LIST_TEMP'] = parse_var_list( self.config, met_tool=self.app_name ) - # skip RuntimeFreq input file logic - remove once integrated - c_dict['FIND_FILES'] = False return c_dict def get_command(self): @@ -400,7 +345,7 @@ def get_command(self): """ return (f"{self.app_path} -v {self.c_dict['VERBOSITY']}" f" {' '.join(self.infiles)} {self.param}" - f" {' '.join(self.args)} -outdir {self.outdir}") + f"{' '.join(self.args) if self.args else ''} -outdir {self.outdir}") def find_input_files(self, time_info): # get ensemble model files diff --git a/metplus/wrappers/grid_diag_wrapper.py b/metplus/wrappers/grid_diag_wrapper.py index f2228cde14..e7a98a072c 100755 --- a/metplus/wrappers/grid_diag_wrapper.py +++ b/metplus/wrappers/grid_diag_wrapper.py @@ -140,13 +140,10 @@ def get_command(self): return cmd def run_at_time_once(self, time_info): - # subset input files as appropriate - input_list_dict = self.subset_input_files(time_info) - if not input_list_dict: - return - - for input_list_file in input_list_dict.values(): - self.infiles.append(input_list_file) + for file_dict in self.c_dict['ALL_FILES']: + if file_dict is None: continue + self.clear() + self.add_to_infiles(file_dict, time_info) # get output path if not self.find_and_check_output_file(time_info): @@ -205,33 +202,3 @@ def set_command_line_arguments(self, time_info): """ config_file = do_string_sub(self.c_dict['CONFIG_FILE'], **time_info) self.args.append(f"-config {config_file}") - - def get_files_from_time(self, time_info): - """! Create dictionary containing time information (key time_info) and - any relevant files for that runtime. The parent implementation of - this function creates a dictionary and adds the time_info to it. - This wrapper gets all files for the current runtime and adds it to - the dictionary with key 'input' - - @param time_info dictionary containing time information - @returns dictionary containing time_info dict and any relevant - files with a key representing a description of that file - """ - input_files, offset_time_info = self.get_input_files(time_info) - if input_files is None: - return None - - file_dict = {'time_info': time_info.copy()} - for key, value in input_files.items(): - file_dict[key] = value - - return file_dict - - def _update_list_with_new_files(self, time_info, list_to_update): - new_files = self.get_files_from_time(time_info) - if not new_files: - return - if isinstance(new_files, list): - list_to_update.extend(new_files) - else: - list_to_update.append(new_files) diff --git a/metplus/wrappers/grid_stat_wrapper.py b/metplus/wrappers/grid_stat_wrapper.py index 8c2bd081fb..335e604614 100755 --- a/metplus/wrappers/grid_stat_wrapper.py +++ b/metplus/wrappers/grid_stat_wrapper.py @@ -276,6 +276,4 @@ def create_c_dict(self): self.add_met_config(name='seeps_p1_thresh', data_type='string', extra_args={'remove_quotes': True}) - # skip RuntimeFreq input file logic - remove once integrated - c_dict['FIND_FILES'] = False return c_dict diff --git a/metplus/wrappers/mode_wrapper.py b/metplus/wrappers/mode_wrapper.py index f650b26a29..c96f765dbd 100755 --- a/metplus/wrappers/mode_wrapper.py +++ b/metplus/wrappers/mode_wrapper.py @@ -168,30 +168,15 @@ def create_c_dict(self): self.logger.info(f'{tool}_MULTIVAR_LOGIC was set, so running ' 'multi-variate MODE') - # observation input file info - c_dict['OBS_INPUT_DIR'] = ( - self.config.getdir(f'OBS_{tool}_INPUT_DIR', '') - ) - c_dict['OBS_INPUT_TEMPLATE'] = ( - self.config.getraw('config', f'OBS_{tool}_INPUT_TEMPLATE') - ) - if not c_dict['OBS_INPUT_TEMPLATE']: - self.log_error(f'OBS_{tool}_INPUT_TEMPLATE must be set') + self.get_input_templates(c_dict, { + 'FCST': {'prefix': 'FCST_MODE', 'required': True}, + 'OBS': {'prefix': 'OBS_MODE', 'required': True}, + }) c_dict['OBS_INPUT_DATATYPE'] = ( self.config.getstr('config', f'OBS_{tool}_INPUT_DATATYPE', '') ) - # forecast input file info - c_dict['FCST_INPUT_DIR'] = ( - self.config.getdir(f'FCST_{tool}_INPUT_DIR', '') - ) - c_dict['FCST_INPUT_TEMPLATE'] = ( - self.config.getraw('config', f'FCST_{tool}_INPUT_TEMPLATE') - ) - if not c_dict['FCST_INPUT_TEMPLATE']: - self.log_error(f'FCST_{tool}_INPUT_TEMPLATE must be set') - c_dict['FCST_INPUT_DATATYPE'] = ( self.config.getstr('config', f'FCST_{tool}_INPUT_DATATYPE', '') ) @@ -448,18 +433,16 @@ def create_c_dict(self): self.add_met_config(name='multivar_intensity_compare_obs', data_type='list', extra_args={'remove_quotes': True}) - - # skip RuntimeFreq input file logic - remove once integrated - c_dict['FIND_FILES'] = False return c_dict - def run_at_time_one_field(self, time_info, var_info): + def run_at_time_all_fields(self, time_info, var_list): """! Runs mode once for each fcst/obs threshold. Overrides run_at_time_one_field function in compare_gridded_wrapper.py @param time_info dictionary containing timing information - @param var_info object containing variable information + @param var_list list of objects containing variable information """ + var_info = var_list[0] # if no thresholds are specified, run once fcst_thresh_list = [] obs_thresh_list = [] diff --git a/metplus/wrappers/mtd_wrapper.py b/metplus/wrappers/mtd_wrapper.py index c2c19a1e3c..61d063c380 100755 --- a/metplus/wrappers/mtd_wrapper.py +++ b/metplus/wrappers/mtd_wrapper.py @@ -86,8 +86,8 @@ def create_c_dict(self): self.add_met_config(name='min_volume', data_type='int') input_info = { - 'FCST': {'prefix': 'FCST_MTD', 'required': True}, - 'OBS': {'prefix': 'OBS_MTD', 'required': True}, + 'FCST': {'prefix': 'FCST_MTD', 'required': False}, + 'OBS': {'prefix': 'OBS_MTD', 'required': False}, } c_dict['SINGLE_RUN'] = ( @@ -166,7 +166,7 @@ def run_at_time_once(self, time_info): # loop through the files found for each field (var_info) for file_dict in self.c_dict['ALL_FILES']: - var_info = file_dict['var_info'] + var_info = file_dict['var_list'][0] inputs = {} for data_type in ('FCST', 'OBS'): file_list = file_dict.get(data_type) @@ -328,3 +328,13 @@ def get_command(self): cmd += '-outdir {}'.format(self.outdir) return cmd + + def get_files_from_time(self, time_info): + file_dict_list = super().get_files_from_time(time_info) + if self.c_dict['SINGLE_RUN']: + return file_dict_list + for file_dict in file_dict_list: + if file_dict.get('OBS') is None or file_dict.get('FCST') is None: + file_dict['OBS'] = None + file_dict['FCST'] = None + return file_dict_list diff --git a/metplus/wrappers/point_stat_wrapper.py b/metplus/wrappers/point_stat_wrapper.py index 528e585506..0ece21d516 100755 --- a/metplus/wrappers/point_stat_wrapper.py +++ b/metplus/wrappers/point_stat_wrapper.py @@ -128,6 +128,8 @@ def create_c_dict(self): c_dict['VERBOSITY']) ) c_dict['ALLOW_MULTIPLE_FILES'] = True + c_dict['SUPPORTS_FILE_LIST'] = False + c_dict['OFFSETS'] = getlistint( self.config.getstr('config', 'POINT_STAT_OFFSETS', '0') ) @@ -304,8 +306,6 @@ def create_c_dict(self): if not c_dict['OUTPUT_DIR']: self.log_error('Must set POINT_STAT_OUTPUT_DIR in config file') - # skip RuntimeFreq input file logic - remove once integrated - c_dict['FIND_FILES'] = False return c_dict def set_command_line_arguments(self, time_info): diff --git a/metplus/wrappers/runtime_freq_wrapper.py b/metplus/wrappers/runtime_freq_wrapper.py index 89f0ec7f56..10b9b82d60 100755 --- a/metplus/wrappers/runtime_freq_wrapper.py +++ b/metplus/wrappers/runtime_freq_wrapper.py @@ -463,23 +463,46 @@ def get_all_files(self, custom=None): time_input, wildcard_if_empty=use_wildcard) lead_files = self.get_all_files_from_leads(time_input, lead_seq) - all_files.extend(lead_files) + self._update_list_with_new_files(lead_files, all_files) return all_files def _check_input_files(self): if self.c_dict['ALL_FILES'] is True: return True - self.run_count += 1 - if not self.c_dict['ALL_FILES'] and self.app_name != 'user_script': - self.missing_input_count += 1 - msg = 'A problem occurred trying to obtain input files' - if self.c_dict['ALLOW_MISSING_INPUTS']: - self.logger.warning(msg) + + num_missing = 0 + + if self.c_dict['ALL_FILES']: + num_runs = len(self.c_dict['ALL_FILES']) + else: + num_runs = 1 + num_missing = 1 + self.run_count += num_runs + + for file_dict in self.c_dict['ALL_FILES']: + if file_dict is None: + num_missing += 1 else: - self.log_error(msg) - return False - return True + for key, value in file_dict.items(): + if key in ('var_list', 'time_info'): continue + if value is None or value == ['missing'] or all(item == 'missing' for item in value): + num_missing += 1 + break + + if self.app_name == 'user_script' or not num_missing: + return True + + self.missing_input_count += num_missing + msg = 'A problem occurred trying to obtain input files' + if self.c_dict['ALLOW_MISSING_INPUTS']: + self.logger.warning(msg) + else: + # increment error counter for GridDiag because it does not log error for each missing file + if self.app_name in ('grid_diag', 'series_analysis'): + self.errors += 1 + self.logger.error(msg) + return False def get_all_files_from_leads(self, time_input, lead_seq): if not self.c_dict.get('FIND_FILES', True): @@ -497,7 +520,10 @@ def get_all_files_from_leads(self, time_input, lead_seq): if skip_time(time_info, self.c_dict): continue - self._update_list_with_new_files(time_info, lead_files) + new_files = self.get_files_from_time(time_info) + if not new_files: + continue + self._update_list_with_new_files(new_files, lead_files) return lead_files @@ -505,7 +531,7 @@ def get_all_files_for_lead(self, time_input): if not self.c_dict.get('FIND_FILES', True): return True - new_files = [] + all_files = [] for run_time in time_generator(self.config): if run_time is None: continue @@ -521,18 +547,44 @@ def get_all_files_for_lead(self, time_input): if skip_time(time_info, self.c_dict): continue - self._update_list_with_new_files(time_info, new_files) + new_files = self.get_files_from_time(time_info) + if not new_files: + continue + self._update_list_with_new_files(new_files, all_files) - return new_files + return all_files def get_all_files_for_each(self, time_info): if not self.c_dict.get('FIND_FILES', True): return True all_files = [] - self._update_list_with_new_files(time_info, all_files) + new_files = self.get_files_from_time(time_info) + if not new_files: + return [] + self._update_list_with_new_files(new_files, all_files) return all_files + def _get_var_lists(self, time_info): + var_list_temp = self.c_dict.get('VAR_LIST_TEMP') + # if VAR_LIST_TEMP was not set in c_dict, return a list with None + if var_list_temp is None: + return [None] + + var_list = sub_var_list(var_list_temp, time_info) + # if var list was not specified, log error and return empty list + if not var_list: + self.log_error('No input fields were specified.' + ' [FCST/OBS]_VAR_NAME must be set.') + return [] + + # if running once per field, return a list of lists each with 1 var + if self.c_dict.get('ONCE_PER_FIELD', False): + return [[item] for item in var_list] + + # if running once for all fields, return a list with 1 list of vars + return [var_list] + def get_files_from_time(self, time_info): """! Create dictionary containing time information (key time_info) and any relevant files for that runtime. The parent implementation of @@ -544,56 +596,57 @@ def get_files_from_time(self, time_info): @returns dictionary containing time_info dict and any relevant files with a key representing a description of that file """ - var_list = [None] - if self.c_dict.get('ONCE_PER_FIELD', False): - var_list = sub_var_list(self.c_dict.get('VAR_LIST_TEMP'), time_info) + var_lists = self._get_var_lists(time_info) + if not var_lists: + return [] + + allow_missing = self.c_dict.get('ALLOW_MISSING_INPUTS', False) # create a dictionary for each field (var) with time_info and files file_dict_list = [] - for var_info in var_list: - file_dict = {'var_info': var_info} - if var_info: - add_field_info_to_time_info(time_info, var_info) + for var_list in var_lists: + file_dict = {'var_list': var_list} + current_var_info = var_list[0] if var_list else None + if current_var_info: + add_field_info_to_time_info(time_info, current_var_info) input_files, offset_time_info = ( - self.get_input_files(time_info, fill_missing=True) + self.get_input_files(time_info, fill_missing=allow_missing) ) file_dict['time_info'] = offset_time_info.copy() - # only add all input files if none are missing - no_missing = True if input_files: for key, value in input_files.items(): - if 'missing' in value: - no_missing = False file_dict[key] = value - if no_missing: - file_dict_list.append(file_dict) - return file_dict_list + file_dict_list.append(file_dict) - def _update_list_with_new_files(self, time_info, list_to_update): - new_files = self.get_files_from_time(time_info) - if not new_files: - return + return file_dict_list + def _update_list_with_new_files(self, new_files, list_to_update): if not isinstance(new_files, list): new_files = [new_files] # if list to update is empty, copy new items into list if not list_to_update: for new_file in new_files: - list_to_update.append(new_file.copy()) + if new_file is None: + list_to_update.append(None) + else: + list_to_update.append(new_file.copy()) return # if list to update is not empty, add new files to each file list, # make sure new files correspond to the correct field (var) assert len(list_to_update) == len(new_files) for new_file, existing_item in zip(new_files, list_to_update): - assert new_file.get('var_info') == existing_item.get('var_info') + assert new_file.get('var_list') == existing_item.get('var_list') or existing_item is None for key, value in new_file.items(): - if key == 'var_info' or key == 'time_info': + if key == 'var_list' or key == 'time_info' or existing_item is None: continue - existing_item[key].extend(value) + if value is not None: + if existing_item[key] is None: + existing_item[key] = [] + existing_item[key].extend(value) @staticmethod def compare_time_info(runtime, filetime): @@ -663,17 +716,32 @@ def get_input_files(self, time_info, fill_missing=False): mandatory=mandatory) if not input_files: - if not fill_missing: - continue - # if no files are found and fill missing is set, add 'missing' - input_files = ['missing'] + if fill_missing: + input_files = ['missing'] + else: + input_files = None + + elif label == 'FCST': + # check if control file is found in ensemble list + ctrl_file = all_input_files.get('CTRL') + if ctrl_file in input_files: + # warn and remove control file if found + self.logger.warning(f"Control file found in ensemble list: " + f"{ctrl_file}. Removing from list.") + input_files.remove(ctrl_file) + + # check EnsembleStat number of files + if self.env_var_dict.get('METPLUS_ENS_MEMBER_IDS'): + self.logger.debug('Skipping logic to fill file list with MISSING') + elif not self._check_expected_ensembles(input_files): + input_files = None all_input_files[label] = input_files # return None if no matching input files were found if not all_input_files: - return None, None + return None, offset_time_info return all_input_files, offset_time_info @@ -743,7 +811,7 @@ def _add_files_that_match_time(self, all_input_files, time_info, file_dict, lead for input_key in file_dict: # skip time info key - if input_key == 'time_info': + if input_key == 'time_info' or input_key == 'var_list': continue if input_key not in all_input_files: @@ -784,3 +852,36 @@ def get_list_file_name(self, time_info, identifier): return (f"{self.app_name}_files_{identifier}_" f"init_{init}_valid_{valid}_lead_{lead}.txt") + + def add_to_infiles(self, file_dict, time_info): + for data_type in [item for item in file_dict.keys() if item not in ('var_list', 'time_info')]: + file_list = file_dict.get(data_type) + if not file_list: continue + if data_type == 'CTRL': + ctrl_file = file_list[0] + self.infiles.append(f'-ctrl {ctrl_file}') + continue + if data_type == 'OBS_GRID': + for input_file in file_list: + self.infiles.append(f'-grid_obs {input_file}') + continue + if data_type == 'OBS_POINT': + for input_file in file_list: + self.infiles.append(f'-point_obs {input_file}') + continue + if data_type == 'ENS_MEAN': + for input_file in file_list: + self.infiles.append(f'-ens_mean {input_file}') + continue + + # if there is more than 1 file, create file list file + if not self.c_dict.get('SUPPORTS_FILE_LIST', True): + self.infiles.extend(file_list) + continue + if len(file_list) > 1: + list_filename = self.get_list_file_name(time_info, data_type) + input_file = self.write_list_file(list_filename, file_list) + else: + input_file = file_list[0] + + self.infiles.append(input_file) diff --git a/metplus/wrappers/series_analysis_wrapper.py b/metplus/wrappers/series_analysis_wrapper.py index 1fb1948a6b..4a98e91ef3 100755 --- a/metplus/wrappers/series_analysis_wrapper.py +++ b/metplus/wrappers/series_analysis_wrapper.py @@ -461,7 +461,8 @@ def get_all_files_for_leads(self, input_dict, leads): for lead in leads: current_input_dict['lead'] = lead new_files = self.get_all_files_for_lead(current_input_dict) - all_files.extend(new_files) + self._update_list_with_new_files(new_files, all_files) + return all_files def run_at_time_once(self, time_info, lead_group=None): @@ -1149,12 +1150,3 @@ def _get_times_from_file_list(file_path, templates): if not found: continue yield file_time_info - - def _update_list_with_new_files(self, time_info, list_to_update): - new_files = self.get_files_from_time(time_info) - if not new_files: - return - if isinstance(new_files, list): - list_to_update.extend(new_files) - else: - list_to_update.append(new_files) diff --git a/metplus/wrappers/wavelet_stat_wrapper.py b/metplus/wrappers/wavelet_stat_wrapper.py index 2c56c7408e..6b95778a38 100755 --- a/metplus/wrappers/wavelet_stat_wrapper.py +++ b/metplus/wrappers/wavelet_stat_wrapper.py @@ -76,24 +76,15 @@ def create_c_dict(self): # get the MET config file path or use default c_dict['CONFIG_FILE'] = self.get_config_file('WaveletStatConfig_wrapped') - c_dict['OBS_INPUT_DIR'] = self.config.getdir(f'OBS_{app}_INPUT_DIR', '') - c_dict['OBS_INPUT_TEMPLATE'] = ( - self.config.getraw('config', f'OBS_{app}_INPUT_TEMPLATE') - ) - if not c_dict['OBS_INPUT_TEMPLATE']: - self.log_error(f"OBS_{app}_INPUT_TEMPLATE required to run") + self.get_input_templates(c_dict, { + 'FCST': {'prefix': 'FCST_WAVELET_STAT', 'required': True}, + 'OBS': {'prefix': 'OBS_WAVELET_STAT', 'required': True}, + }) c_dict['OBS_INPUT_DATATYPE'] = ( self.config.getstr('config', f'OBS_{app}_INPUT_DATATYPE', '') ) - c_dict['FCST_INPUT_DIR'] = self.config.getdir(f'FCST_{app}_INPUT_DIR', '') - c_dict['FCST_INPUT_TEMPLATE'] = ( - self.config.getraw('config', f'FCST_{app}_INPUT_TEMPLATE') - ) - if not c_dict['FCST_INPUT_TEMPLATE']: - self.log_error(f"FCST_{app}_INPUT_TEMPLATE required to run") - c_dict['FCST_INPUT_DATATYPE'] = ( self.config.getstr('config', f'FCST_{app}_INPUT_DATATYPE', '') ) @@ -176,6 +167,4 @@ def create_c_dict(self): }) self.add_met_config(name='output_prefix', data_type='string') - # skip RuntimeFreq input file logic - remove once integrated - c_dict['FIND_FILES'] = False return c_dict