Skip to content

Commit

Permalink
Made component flags apply to timeseries, renamed components to short…
Browse files Browse the repository at this point in the history
…names to match timeseries, removed -all arg, other code cleanup
  • Loading branch information
rmshkv committed May 3, 2024
1 parent bdfd551 commit d0b47e6
Show file tree
Hide file tree
Showing 13 changed files with 51 additions and 118 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ Usage: cupid-run [OPTIONS] CONFIG_PATH
Options:
-s, --serial Do not use LocalCluster objects
-ts, --time-series Run time series generation scripts prior to diagnostics
-a, --all Run all component diagnostics
-atm, --atmosphere Run atmosphere component diagnostics
-ocn, --ocean Run ocean component diagnostics
-lnd, --land Run land component diagnostics
Expand All @@ -108,7 +107,7 @@ client

#### Specifying components

If no component flags are provided, `--all` will be assumed and all component diagnostics listed in `config.yml` will be executed. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice config.yml`.
If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice config.yml`.


### Timeseries File Generation
Expand Down
112 changes: 45 additions & 67 deletions cupid/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
@click.option("--time-series", "-ts", is_flag=True,
help="Run time series generation scripts prior to diagnostics")
# Options to turn components on or off
@click.option("--all", "-a", is_flag=True, help="Run all component diagnostics")
@click.option("--atmosphere", "-atm", is_flag=True, help="Run atmosphere component diagnostics")
@click.option("--ocean", "-ocn", is_flag=True, help="Run ocean component diagnostics")
@click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics")
Expand All @@ -38,7 +37,20 @@ def run(config_path, serial=False, time_series=False,
control = cupid.util.get_control_dict(config_path)
cupid.util.setup_book(config_path)

#####################################################################
component_options = {"atm": atmosphere,
"ocn": ocean,
"lnd": land,
"ice": seaice,
"glc": landice}

# Automatically run all if no components specified

if True not in [atmosphere, ocean, land, seaice, landice]:
all = True
for key in component_options.keys():
component_options[key] = True

#####################################################################
# Managing global parameters

global_params = dict()
Expand All @@ -49,33 +61,32 @@ def run(config_path, serial=False, time_series=False,
global_params['serial'] = serial

####################################################################

if time_series:
timeseries_params = control["timeseries"]

# general timeseries arguments for all components
num_procs = timeseries_params["num_procs"]



for component in ['atm', 'ocn', 'lnd', 'ice', 'glc']:
cupid.timeseries.create_time_series(
component,
timeseries_params[component]["vars"],
timeseries_params[component]["derive_vars"],
[timeseries_params["case_name"]], # could also grab from compute_notebooks section of config file
timeseries_params[component]["hist_str"],
[global_params["CESM_output_dir"] + "/" + timeseries_params["case_name"] + f"/{component}/hist/"], # could also grab from compute_notebooks section of config file
[global_params["CESM_output_dir"]+'/'+timeseries_params['case_name']+f'/{component}/proc/tseries/'],
# Note that timeseries output will eventually go in /glade/derecho/scratch/${USER}/archive/${CASE}/${component}/proc/tseries/
timeseries_params["ts_done"],
timeseries_params["overwrite_ts"],
timeseries_params[component]["start_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.start_date
timeseries_params[component]["end_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.end_date
timeseries_params[component]["level"],
num_procs,
serial,
)
for component, comp_bool in component_options.items():
if comp_bool:
cupid.timeseries.create_time_series(
component,
timeseries_params[component]["vars"],
timeseries_params[component]["derive_vars"],
[timeseries_params["case_name"]], # could also grab from compute_notebooks section of config file
timeseries_params[component]["hist_str"],
[global_params["CESM_output_dir"] + "/" + timeseries_params["case_name"] + f"/{component}/hist/"], # could also grab from compute_notebooks section of config file
[global_params["CESM_output_dir"]+'/'+timeseries_params['case_name']+f'/{component}/proc/tseries/'],
# Note that timeseries output will eventually go in /glade/derecho/scratch/${USER}/archive/${CASE}/${component}/proc/tseries/
timeseries_params["ts_done"],
timeseries_params["overwrite_ts"],
timeseries_params[component]["start_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.start_date
timeseries_params[component]["end_years"], # could get from yaml file in adf_quick_run.parameter_groups.none.config_fil_str, or for other notebooks config files, eg ocean_surface.parameter_gropus.none.mom6_tools_config.end_date
timeseries_params[component]["level"],
num_procs,
serial,
)

# Grab paths

Expand Down Expand Up @@ -122,12 +133,6 @@ def run(config_path, serial=False, time_series=False,
#####################################################################
# Organizing notebooks to run

component_options = {"atmosphere": atmosphere,
"ocean": ocean,
"land": land,
"seaice": seaice,
"landice": landice}

if 'compute_notebooks' in control:

all_nbs = dict()
Expand All @@ -136,29 +141,15 @@ def run(config_path, serial=False, time_series=False,
all_nbs[nb] = info
all_nbs[nb]['nb_path_root'] = nb_path_root + '/infrastructure'
all_nbs[nb]['output_dir'] = output_dir + '/infrastructure'

# Automatically run all if not specified

if True not in [atmosphere, ocean, land, seaice, landice]:
all = True

if all:
for comp_name, comp_nbs in control["compute_notebooks"].items():
for nb, info in comp_nbs.items():

for comp_name, comp_bool in component_options.items():
if comp_name in control['compute_notebooks'] and comp_bool:
for nb, info in control['compute_notebooks'][comp_name].items():
all_nbs[nb] = info
all_nbs[nb]['nb_path_root'] = nb_path_root + '/' + comp_name
all_nbs[nb]['output_dir'] = output_dir + '/' + comp_name

else:
for comp_name, comp_bool in component_options.items():
if comp_bool:
if comp_name in control['compute_notebooks']:
for nb, info in control['compute_notebooks'][comp_name].items():
all_nbs[nb] = info
all_nbs[nb]['nb_path_root'] = nb_path_root + '/' + comp_name
all_nbs[nb]['output_dir'] = output_dir + '/' + comp_name
else:
warnings.warn(f"No notebooks for {comp_name} component specified in config file.")
elif comp_bool and not all:
warnings.warn(f"No notebooks for {comp_name} component specified in config file.")

# Checking for existence of environments

Expand All @@ -181,26 +172,13 @@ def run(config_path, serial=False, time_series=False,

all_scripts = dict()

# Automatically run all if not specified

if True not in [atmosphere, ocean, land, seaice, landice]:
all = True

if all:
for comp_name, comp_scripts in control["compute_scripts"].items():
for script, info in comp_scripts.items():
for comp_name, comp_bool in component_options.items():
if comp_name in control['compute_scripts'] and comp_bool:
for script, info in control['compute_scripts'][comp_name].items():
all_scripts[script] = info
all_scripts[script]['nb_path_root'] = nb_path_root + '/' + comp_name

else:
for comp_name, comp_bool in component_options.items():
if comp_bool:
if comp_name in control['compute_scripts']:
for script, info in control['compute_scripts'][comp_name].items():
all_scripts[script] = info
all_scripts[script]['nb_path_root'] = nb_path_root + '/' + comp_name
else:
warnings.warn(f"No scripts for {comp_name} component specified in config file.")
elif comp_bool and not all:
warnings.warn(f"No scripts for {comp_name} component specified in config file.")

# Checking for existence of environments

Expand Down
46 changes: 1 addition & 45 deletions cupid/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,52 +104,8 @@ def setup_book(config_path):
with open(f"{output_dir}/_config.yml", "w") as fid:
yaml.dump(config, fid, sort_keys=False)

# get list of computational notebooks
return None

# if 'compute_notebooks' in control:

# nb_path_root = os.path.realpath(os.path.expanduser(control['data_sources']['nb_path_root']))
# # the below won't work for index, unless we put it in an infrastructure folder and change that elsewhere
# compute_notebooks = [f"{nb_path_root}/{ok}/{ik}.ipynb" for ok, ov in control["compute_notebooks"].items() for ik, iv in ov.items()]

# # get toc files; ignore glob expressions
# toc_files = get_toc_files(nb_path_root, toc, include_glob=False)
# copy_files = list(set(toc_files) - set(compute_notebooks))

# for src in copy_files:
# #shutil.copyfile(src, f"{output_dir}/{src}")
# pass


# def get_toc_files(nb_path_root, toc_dict, include_glob=True):
# """return a list of files in the _toc.yml"""

# def _toc_files(toc_dict, file_list=[]):
# for key, value in toc_dict.items():

# if key in ["root", "file", "glob"]:
# if not include_glob and key == "glob":
# continue
# if key == "glob":
# file = glob(f"{nb_path_root}/{value}")
# else:
# file = [f"{nb_path_root}/{value}.{ext}" for ext in ["ipynb", "md"] if os.path.exists(f"{nb_path_root}/{value}.{ext}")]

# assert len(file), f"no files found: {value}"
# assert len(file) == 1, f"multiple files found: {value}"
# file_list.append(file[0])

# elif key in ["chapters", "sections", "parts"]:
# file_list_ext = []
# for sub in value:
# file_list_ext = _toc_files(sub, file_list_ext)
# file_list.extend(file_list_ext)

# return file_list

# return _toc_files(toc_dict)


def create_ploomber_nb_task(nb, info, cat_path, nb_path_root, output_dir, global_params, dag, dependency=None):
"""
Creates a ploomber task for running a notebook, including necessary parameters.
Expand Down
8 changes: 4 additions & 4 deletions examples/coupled_model/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ compute_notebooks:
parameter_groups:
none: {}

atmosphere:
atm:
adf_quick_run:
parameter_groups:
none:
adf_path: ../../../externals/ADF
config_path: .
config_fil_str: "config_f.cam6_3_119.FLTHIST_ne30.r328_gamma0.33_soae.001.yaml"

ocean:
ocn:
ocean_surface:
parameter_groups:
none:
Expand All @@ -129,7 +129,7 @@ compute_notebooks:
static: 'mom6.h.static.nc'
oce_cat: /glade/u/home/gmarques/libs/oce-catalogs/reference-datasets.yml

land:
lnd:
land_comparison:
parameter_groups:
none:
Expand All @@ -140,7 +140,7 @@ compute_notebooks:
- 1850pAD
- 1850pSASU

seaice:
ice:
seaice:
parameter_groups:
none:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit d0b47e6

Please sign in to comment.