Skip to content

Commit

Permalink
Add an archive task to GEFS system to archive files in HPSS (NOAA-EMC…
Browse files Browse the repository at this point in the history
…#2895)

- This task is an extension of the arch job previously merged that
archives files in ROTDIR (NOAA-EMC#2816
AntonMFernando-NOAA@2816c3b)
- This feature adds an archive task to GEFS system to archive files in
HPSSARCH and LOCALARCH.
  Resolves NOAA-EMC#2698  
  Refs NOAA-EMC#2816 NOAA-EMC#2772 NOAA-EMC#832 
---------

Co-authored-by: David Huber <[email protected]>
  • Loading branch information
AntonMFernando-NOAA and DavidHuber-NOAA authored Sep 25, 2024
1 parent ec63492 commit 7088a91
Show file tree
Hide file tree
Showing 16 changed files with 286 additions and 17 deletions.
86 changes: 86 additions & 0 deletions parm/archive/gefs_extracted_atmos.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{% set cycle_HH = current_cycle | strftime("%H") %}
{% set cycle_YMD = current_cycle | to_YMD %}
{% set cycle_YMDH = current_cycle | to_YMDH %}
{% set head = RUN + ".t" + cycle_HH + "z." %}

gefs_atmos:
name: "GEFS_ATMOS"
target: "{{ ATARDIR }}/{{ cycle_YMDH }}/gefs_atmos.tar"
required:
#select mem%03d and ensstat files required
{% set members = ["ensstat"] %}
{% for mem_nm in range(0, NMEM_ENS + 1) %}
{% do members.append("mem" ~ '%03d' % mem_nm ) %}
{% endfor %}

{% if REPLAY_ICS %}
{% set ofst_hr = OFFSET_START_HOUR %}
{% else %}
{% set ofst_hr = FHMIN_GFS %}
{% endif %}

{% for mem in members %}
{% for res in ['0p25', '0p50', '1p00'] %}
{% set tmpl_dict = ({ '${ROTDIR}':ROTDIR,
'${RUN}':RUN,
'${YMD}':cycle_YMD,
'${HH}':cycle_HH,
'${GRID}':res,
'${MEMDIR}':mem }) %}

{% set COMIN_ATMOS_GRIB = COM_ATMOS_GRIB_GRID_TMPL | replace_tmpl(tmpl_dict) %}

# Select pgrb and grib files to copy to the atardir
{% if path_exists(COMIN_ATMOS_GRIB) %}
{% if FHMAX_HF_GFS == 0 %}
{% for fhr in range(ofst_hr, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS) %}
{% if mem=="ensstat" %}
{% set file_name = head ~ "mean.pres_." ~ res ~ ".f" ~ '%03d'|format(fhr) ~ ".grib2" %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% else %}
{% set file_name = head ~ "pgrb2." ~ res ~ ".f" ~ '%03d'|format(fhr) %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% set file_name = head ~ "pgrb2b." ~ res ~ ".f" ~ '%03d'|format(fhr) %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% endif %}
{% endfor %}
{% else %}
{% if res == "0p25" %}
{% for fhr in range(ofst_hr, FHMAX_HF_GFS + FHOUT_HF_GFS, FHOUT_HF_GFS) %}
{% if mem=="ensstat" %}
{% set file_name = head ~ "mean.pres_." ~ res ~ ".f" ~ '%03d'|format(fhr) ~ ".grib2" %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% else %}
{% set file_name = head ~ "pgrb2." ~ res ~ ".f" ~ '%03d'|format(fhr) %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% set file_name = head ~ "pgrb2b." ~ res ~ ".f" ~ '%03d'|format(fhr) %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% endif %}
{% endfor %}
{% endif %}
{% if res == "0p50" %}
{% for fhr in range(FHMAX_HF_GFS + FHOUT_GFS, FHMAX_GFS + FHOUT_GFS, FHOUT_GFS) %}
{% if mem=="ensstat" %}
{% set file_name = head ~ "mean.pres_." ~ res ~ ".f" ~ '%03d'|format(fhr) ~ ".grib2" %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% else %}
{% set file_name = head ~ "pgrb2." ~ res ~ ".f" ~ '%03d'|format(fhr) %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% set file_name = head ~ "pgrb2b." ~ res ~ ".f" ~ '%03d'|format(fhr) %}
{% set file_path = COMIN_ATMOS_GRIB ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% endif %}
{% endfor %}
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{% endfor %}
33 changes: 33 additions & 0 deletions parm/archive/gefs_extracted_ice.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{% set cycle_HH = current_cycle | strftime("%H") %}
{% set cycle_YMD = current_cycle | to_YMD %}
{% set cycle_YMDH = current_cycle | to_YMDH %}
{% set head = RUN + ".ice.t" + cycle_HH + "z." %}

gefs_ice:
name: "GEFS_ICE"
target: "{{ ATARDIR }}/{{ cycle_YMDH }}/gefs_ice.tar"
required:
#select mem%03d and ensstat files required
{% set members = [] %}
{% for mem_nm in range(0, NMEM_ENS + 1) %}
{% do members.append("mem" ~ '%03d' % mem_nm ) %}
{% endfor %}

{% for mem in members %}
{% set tmpl_dict = ({ '${ROTDIR}':ROTDIR,
'${RUN}':RUN,
'${YMD}':cycle_YMD,
'${HH}':cycle_HH,
'${MEMDIR}':mem }) %}

{% set COMIN_ICE_HISTORY = COM_ICE_HISTORY_TMPL | replace_tmpl(tmpl_dict) %}

# Select netcdf files to copy to the atardir
{% if path_exists(COMIN_ICE_HISTORY) %}
{% for fhr in range(FHMIN_GFS + FHOUT_ICE_GFS, FHMAX_GFS + FHOUT_ICE_GFS, FHOUT_ICE_GFS) %}
{% set file_name = head ~ FHOUT_ICE_GFS ~ "hr_avg" ~ ".f" ~ '%03d'|format(fhr) ~ ".nc" %}
{% set file_path = COMIN_ICE_HISTORY ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% endfor %}
{% endif %}
{% endfor %}
40 changes: 40 additions & 0 deletions parm/archive/gefs_extracted_ocean.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{% set cycle_HH = current_cycle | strftime("%H") %}
{% set cycle_YMD = current_cycle | to_YMD %}
{% set cycle_YMDH = current_cycle | to_YMDH %}
{% set head = RUN + ".ocean.t" + cycle_HH + "z." %}

gefs_ocean:
name: "GEFS_OCEAN"
target: "{{ ATARDIR }}/{{ cycle_YMDH }}/gefs_ocean.tar"
required:
#select mem%03d and ensstat files required
{% set members = [] %}
{% for mem_nm in range(0, NMEM_ENS + 1) %}
{% do members.append("mem" ~ '%03d' % mem_nm ) %}
{% endfor %}

{% if OCNRES == "025" %}
{% set res = "1p00" %}
{% else %}
{% set res = (OCNRES|string())[0] ~ "p" ~ (OCNRES|string())[-2:] %}
{% endif %}

{% for mem in members %}
{% set tmpl_dict = ({ '${ROTDIR}':ROTDIR,
'${RUN}':RUN,
'${YMD}':cycle_YMD,
'${HH}':cycle_HH,
'${MEMDIR}':mem }) %}

{% set COMIN_OCEAN_NETCDF = COM_OCEAN_NETCDF_TMPL | replace_tmpl(tmpl_dict) %}

# Select netcdf files to copy to the atardir
{% set netcdf_grid_dir = COMIN_OCEAN_NETCDF ~ "/" ~ res %}
{% if path_exists(netcdf_grid_dir) %}
{% for fhr in range(FHMIN_GFS + FHOUT_OCN_GFS, FHMAX_GFS + FHOUT_OCN_GFS, FHOUT_OCN_GFS) %}
{% set file_name = head ~ res ~ ".f" ~ '%03d'|format(fhr) ~ ".nc" %}
{% set file_path = netcdf_grid_dir ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% endfor %}
{% endif %}
{% endfor %}
51 changes: 51 additions & 0 deletions parm/archive/gefs_extracted_wave.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{% set cycle_HH = current_cycle | strftime("%H") %}
{% set cycle_YMD = current_cycle | to_YMD %}
{% set cycle_YMDH = current_cycle | to_YMDH %}
{% set head = RUN + "wave.t" + cycle_HH + "z." %}

gefs_wave:
name: "GEFS_WAVE"
target: "{{ ATARDIR }}/{{ cycle_YMDH }}/gefs_wave.tar"
required:
{% if REPLAY_ICS %}
{% set ofst_hr = OFFSET_START_HOUR %}
{% else %}
{% set ofst_hr = FHMIN_GFS %}
{% endif %}

{% set res = (waveGRD[-3:])[0] ~ "p" ~ (waveGRD[-3:])[-2:] %}

#select mem%03d and ensstat files required
{% set members = [] %}
{% for mem_nm in range(0, NMEM_ENS + 1) %}
{% do members.append("mem" ~ '%03d' % mem_nm ) %}
{% endfor %}

{% for mem in members %}
{% set tmpl_dict = ({ '${ROTDIR}':ROTDIR,
'${RUN}':RUN,
'${YMD}':cycle_YMD,
'${HH}':cycle_HH,
'${MEMDIR}':mem }) %}

{% set COMIN_WAVE_GRID = COM_WAVE_GRID_TMPL | replace_tmpl(tmpl_dict) %}
# Select grib2 files to copy to the atardir
{% if path_exists(COMIN_WAVE_GRID) %}
{% for fhr in range(ofst_hr, FHMAX_GFS + FHOUT_WAV, FHOUT_WAV) %}
{% set file_name = head ~ "global." ~ res ~ ".f" ~ '%03d'|format(fhr) ~ ".grib2" %}
{% set file_path = COMIN_WAVE_GRID ~ "/" ~ file_name %}
- "{{ file_path | relpath(ROTDIR)}}"
{% endfor %}
{% endif %}

{% set COMIN_WAVE_STATION = COM_WAVE_STATION_TMPL | replace_tmpl(tmpl_dict) %}
# Select station files to copy to the atardir
{% if path_exists(COMIN_WAVE_STATION) %}
{% set file_path = COMIN_WAVE_STATION ~ "/" ~ RUN ~ "wave.t" ~ cycle_HH ~ "z.spec_tar.gz" %}
- "{{ file_path | relpath(ROTDIR)}}"
{% set file_path = COMIN_WAVE_STATION ~ "/" ~ RUN ~ "wave.t" ~ cycle_HH ~ "z.cbull_tar" %}
- "{{ file_path | relpath(ROTDIR)}}"
{% set file_path = COMIN_WAVE_STATION ~ "/" ~ RUN ~ "wave.t" ~ cycle_HH ~ "z.bull_tar" %}
- "{{ file_path | relpath(ROTDIR)}}"
{% endif %}
{% endfor %}
12 changes: 12 additions & 0 deletions parm/archive/master_gefs.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Set variables/lists needed to parse the gefs templates
{% set cycle_HH = current_cycle | strftime("%H") %}
{% set cycle_YMD = current_cycle | to_YMD %}
{% set cycle_YMDH = current_cycle | to_YMDH %}

datasets:
{% filter indent(width=4) %}
{% include "gefs_extracted_atmos.yaml.j2" %}
{% include "gefs_extracted_ocean.yaml.j2" %}
{% include "gefs_extracted_ice.yaml.j2" %}
{% include "gefs_extracted_wave.yaml.j2" %}
{% endfilter %}
1 change: 0 additions & 1 deletion parm/config/gefs/yaml/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ base:
FCST_BREAKPOINTS: "48"
REPLAY_ICS: "NO"
USE_OCN_PERTURB_FILES: "false"
HPSSARCH: "NO"
LOCALARCH: "NO"
11 changes: 6 additions & 5 deletions parm/config/gfs/config.resources
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ echo "BEGIN: config.resources"
case ${machine} in
"WCOSS2")
max_tasks_per_node=128
# WCOSS2 nodes have 512GB of RAM, but only 500GB are reservable
# shellcheck disable=SC2034
mem_node_max="500GB"
;;
Expand All @@ -54,12 +55,12 @@ case ${machine} in
"ORION")
max_tasks_per_node=40
# shellcheck disable=SC2034
mem_node_max="192GB"
mem_node_max="180GB"
;;
"HERCULES")
max_tasks_per_node=80
# shellcheck disable=SC2034
mem_node_max="512GB"
mem_node_max="500GB"
;;
"JET")
case ${PARTITION_BATCH} in
Expand Down Expand Up @@ -145,10 +146,10 @@ export max_tasks_per_node
case ${step} in
"prep")
walltime='00:30:00'
ntasks=4
tasks_per_node=2
ntasks=14
tasks_per_node=14
threads_per_task=1
memory="40GB"
memory="${mem_node_max}"
;;

"prepsnowobs")
Expand Down
5 changes: 5 additions & 0 deletions parm/config/gfs/config.resources.GAEA
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
# Gaea-specific job resources

case ${step} in
"prep")
# Run on two nodes (requires ~400GB total)
tasks_per_node=7
;;

"eobs")
# The number of tasks and cores used must be the same for eobs
# See https://github.com/NOAA-EMC/global-workflow/issues/2092 for details
Expand Down
5 changes: 5 additions & 0 deletions parm/config/gfs/config.resources.HERA
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
# Hera-specific job resources

case ${step} in
"prep")
# Run on 7 nodes for memory requirement
tasks_per_node=2
;;

"anal")
if [[ "${CASE}" == "C384" ]]; then
export ntasks_gdas=270
Expand Down
5 changes: 5 additions & 0 deletions parm/config/gfs/config.resources.JET
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
# Jet-specific job resources

case ${step} in
"prep")
# Run on 7 nodes for memory requirement
tasks_per_node=2
;;

"anal")
if [[ "${CASE}" == "C384" ]]; then
export ntasks=270
Expand Down
6 changes: 6 additions & 0 deletions parm/config/gfs/config.resources.ORION
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
# Orion-specific job resources

case ${step} in
"prep")
# Run on 2 nodes for memory requirement
# This may not be enough and may need to run on more nodes.
export tasks_per_node=7
;;

"anal")
# TODO:
# On Orion, after Rocky 9 upgrade, GSI performance is degraded.
Expand Down
6 changes: 6 additions & 0 deletions parm/config/gfs/config.resources.S4
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
# S4-specific job resources

case ${step} in
"prep")
# Run on two nodes for memory requirement
# This may not be enough memory. Decrease tasks/node to 2 if necessary.
tasks_per_node=7
;;

"anal")
case ${CASE} in
"C384")
Expand Down
5 changes: 3 additions & 2 deletions scripts/exglobal_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ def main():
'FHOUT_HF_WAV', 'FHMAX_WAV', 'FHMAX_HF_WAV', 'FHMAX_WAV_GFS',
'restart_interval_gdas', 'restart_interval_gfs',
'AERO_ANL_RUN', 'AERO_FCST_RUN', 'DOIBP_WAV', 'DO_JEDIOCNVAR',
'NMEM_ENS', 'DO_JEDIATMVAR', 'DO_VRFY_OCEANDA', 'FHMAX_FITS',
'IAUFHRS', 'DO_FIT2OBS', 'NET']
'NMEM_ENS', 'DO_JEDIATMVAR', 'DO_VRFY_OCEANDA', 'FHMAX_FITS', 'waveGRD',
'IAUFHRS', 'DO_FIT2OBS', 'NET', 'FHOUT_HF_GFS', 'FHMAX_HF_GFS', 'REPLAY_ICS',
'OFFSET_START_HOUR']

archive_dict = AttrDict()
for key in keys:
Expand Down
3 changes: 0 additions & 3 deletions ush/python/pygfs/task/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,6 @@ def configure(self, arch_dict: Dict[str, Any]) -> (Dict[str, Any], List[Dict[str
self.tar_cmd = ""
return arcdir_set, []

if arch_dict.NET == "gefs":
raise NotImplementedError("GEFS archiving is not yet implemented!")

master_yaml = "master_" + arch_dict.RUN + ".yaml.j2"

parsed_sets = parse_j2yaml(os.path.join(archive_parm, master_yaml),
Expand Down
4 changes: 2 additions & 2 deletions workflow/applications/gefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def get_task_names(self):
tasks += ['wavepostpnt']

if self.do_extractvars:
tasks += ['extractvars']
tasks += ['extractvars', 'arch']

tasks += ['arch', 'cleanup']
tasks += ['cleanup']

return {f"{self.run}": tasks}
Loading

0 comments on commit 7088a91

Please sign in to comment.