Skip to content

Commit

Permalink
Have the driver script process both python scripts and YAMLs + bufr2i…
Browse files Browse the repository at this point in the history
…oda.x (#730)

This PR allows for all obtypes to be processed in two loops.

We still need to figure out a way to process these in parallel
(somewhat).
  • Loading branch information
CoryMartin-NOAA authored Nov 15, 2023
1 parent 6e6a480 commit 5c305aa
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 47 deletions.
36 changes: 36 additions & 0 deletions parm/ioda/bufr2ioda/atms_beamwidth.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# LOC = data/preproc/atms_beamwidth.dat
# WMO Satellite ID
224
# Version identifier
1
# Sampling distance (deg)
1.11
# Number of channels to modify
22
# Channel, beam width, new width, cutoff, nxaverage, nyaverage, QC dist
# Note: to use FFT technique, set new width and cutoff as required and set
# nxaverage and nyaverage to 0. To use simple averaging set new width to 0.0
# and use nxaverage and nyaverage (e.g. 3 3 for 3x3 averaging).
# QC Dist gives number of points around missing value to flag as missing.
1 5.2 3.33 0.4 0 0 11
2 5.2 3.33 0.4 0 0 11
3 2.2 3.33 0.0 0 0 5
4 2.2 3.33 0.0 0 0 5
5 2.2 3.33 0.0 0 0 5
6 2.2 3.33 0.0 0 0 5
7 2.2 3.33 0.0 0 0 5
8 2.2 3.33 0.0 0 0 5
9 2.2 3.33 0.0 0 0 5
10 2.2 3.33 0.0 0 0 5
11 2.2 3.33 0.0 0 0 5
12 2.2 3.33 0.0 0 0 5
13 2.2 3.33 0.0 0 0 5
14 2.2 3.33 0.0 0 0 5
15 2.2 3.33 0.0 0 0 5
16 2.2 3.33 0.0 0 0 5
17 1.1 3.33 0.0 0 0 5
18 1.1 3.33 0.0 0 0 5
19 1.1 3.33 0.0 0 0 5
20 1.1 3.33 0.0 0 0 5
21 1.1 3.33 0.0 0 0 5
22 1.1 3.33 0.0 0 0 5
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
observations:
- obs space:
name: bufr
obsdatain: "$(BUFR_in)"
obsdatain: "{{ DMPDIR }}/{{ RUN }}.{{ PDY }}/{{ cyc }}/atmos/{{ RUN }}.t{{ cyc }}z.atms.tm00.bufr_d"

exports:
variables:
Expand Down Expand Up @@ -51,9 +51,9 @@ observations:
sensorScanAngle:
sensorScanAngle:
fieldOfViewNumber: "*/FOVN"
heightOfStation: "*/HMSL"
scanStart: -52.725
scanStep: 1.110
sensor: atms

sensorChannelNumber:
query: "*/ATMSCH/CHNM"
Expand All @@ -80,10 +80,11 @@ observations:
map:
_224: npp
_225: n20
# _226: n21

ioda:
backend: netcdf
obsdataout: "$(IODA_out)"
obsdataout: "{{ COM_OBS }}/{{ RUN }}.t{{ cyc }}z.atms.$(splitvar).tm00.nc"

dimensions:
- name: Channel
Expand Down
60 changes: 19 additions & 41 deletions ush/ioda/bufr2ioda/gen_bufr2ioda_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,52 +5,30 @@
# and certain configuration parameters
import argparse
import os
from wxflow import Template, TemplateConstants, YAMLFile
from wxflow import Logger, parse_j2yaml, cast_strdict_as_dtypedict, save_as_yaml
from wxflow import Template, TemplateConstants

# initialize root logger
logger = Logger('gen_bufr2ioda_yaml.py', level='INFO', colored_log=True)

# list of satellite radiance BUFR files that need split by SatId
sat_list = [
'atms',
'1bamua',
'1bmhs',
'crisf4',
'iasidb',
]


def gen_bufr_yaml(config):
# open the template input file
bufr_yaml = YAMLFile(path=config['template yaml'])
# determine if splits need in the output file path
obtype = config['obtype']
if obtype in sat_list:
# split by satellite platform
obtype_out = f"{obtype}_{{splits/satId}}"
else:
obtype_out = obtype
# construct the output IODA file path
output_ioda = [
config['run'],
f"t{config['cyc']:02}z",
obtype_out,
'nc',
]
output_ioda_str = '.'.join(output_ioda)
output_ioda_file = os.path.join(config['output dir'], output_ioda_str)
# construct the template substitution dict
substitutions = {
'BUFR_in': config['input file'],
'IODA_out': output_ioda_file,
}
# substitue templates
bufr_yaml = Template.substitute_structure(bufr_yaml, TemplateConstants.DOLLAR_PARENTHESES, substitutions.get)
# write out BUFR converter YAML file
bufr_yaml.save(config['output yaml file'])
def gen_bufr_yaml(config, template, output):
# read in templated YAML and do substitution
logger.info(f"Using {template} as input")
bufr_config = parse_j2yaml(template, config)
# need to do some special manipulation for the splits
substitutions = {'splitvar': '{splits/satId}'}
bufr_config = Template.substitute_structure(bufr_config, TemplateConstants.DOLLAR_PARENTHESES, substitutions.get)
save_as_yaml(bufr_config, output)
logger.info(f"Wrote to {output}")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', type=str, help='Input YAML Configuration', required=True)
parser.add_argument('-t', '--template', type=str, help='Input YAML template', required=True)
parser.add_argument('-o', '--output', type=str, help='Output YAML file', required=True)
args = parser.parse_args()
config = YAMLFile(path=args.config)
gen_bufr_yaml(config)
# get the config from your environment
config = cast_strdict_as_dtypedict(os.environ)
# call the parsing function
gen_bufr_yaml(config, args.template, args.output)
39 changes: 36 additions & 3 deletions ush/ioda/bufr2ioda/run_bufr2ioda.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import argparse
import glob
import os
import shutil
from pathlib import Path
from gen_bufr2ioda_json import gen_bufr_json
from gen_bufr2ioda_yaml import gen_bufr_yaml
from wxflow import (Logger, Executable, cast_as_dtype, logit,
to_datetime, datetime_to_YMDH, Task, rm_p)

Expand All @@ -18,6 +20,7 @@ def bufr2ioda(current_cycle, RUN, DMPDIR, config_template_dir, COM_OBS):
# Get gdasapp root directory
DIR_ROOT = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../.."))
USH_IODA = os.path.join(DIR_ROOT, "ush", "ioda", "bufr2ioda")
BIN_GDAS = os.path.join(DIR_ROOT, "build", "bin")

# Create output directory if it doesn't exist
os.makedirs(COM_OBS, exist_ok=True)
Expand All @@ -27,14 +30,21 @@ def bufr2ioda(current_cycle, RUN, DMPDIR, config_template_dir, COM_OBS):
'RUN': RUN,
'current_cycle': current_cycle,
'DMPDIR': DMPDIR,
'COM_OBS': COM_OBS
'COM_OBS': COM_OBS,
'PDY': current_cycle.strftime('%Y%m%d'),
'cyc': current_cycle.strftime('%H'),
}

# copy necessary fix files to runtime directory
shutil.copy(os.path.join(config_template_dir, "atms_beamwidth.txt"),
os.path.join(os.getcwd(), "atms_beamwidth.txt"))

# Specify observation types to be processed by a script
BUFR_py_files = glob.glob(os.path.join(USH_IODA, 'bufr2ioda_*.py'))
BUFR_py_files = [os.path.basename(f) for f in BUFR_py_files]
BUFR_py = [f.replace('bufr2ioda_', '').replace('.py', '') for f in BUFR_py_files]

# NOTE or TODO - how to parallelize these loops????
for obtype in BUFR_py:
logger.info(f"Convert {obtype}...")
json_output_file = os.path.join(COM_OBS, f"{obtype}_{datetime_to_YMDH(current_cycle)}.json")
Expand All @@ -51,8 +61,31 @@ def bufr2ioda(current_cycle, RUN, DMPDIR, config_template_dir, COM_OBS):
cmd()

# Check if the converter was successful
if os.path.exists(json_output_file):
rm_p(json_output_file)
# if os.path.exists(json_output_file):
# rm_p(json_output_file)

# Specify observation types to be processed by the bufr2ioda executable
BUFR_yaml_files = glob.glob(os.path.join(config_template_dir, '*.yaml'))
BUFR_yaml_files = [os.path.basename(f) for f in BUFR_yaml_files]
BUFR_yaml = [f.replace('bufr2ioda_', '').replace('.yaml', '') for f in BUFR_yaml_files]

for obtype in BUFR_yaml:
logger.info(f"Convert {obtype}...")
yaml_output_file = os.path.join(COM_OBS, f"{obtype}_{datetime_to_YMDH(current_cycle)}.yaml")
filename = 'bufr2ioda_' + obtype + '.yaml'
template = os.path.join(config_template_dir, filename)
gen_bufr_yaml(config, template, yaml_output_file)

# use the bufr2ioda executable for the ob type
bufr2iodaexe = BIN_GDAS + '/bufr2ioda.x'
cmd = Executable(bufr2iodaexe)
cmd.add_default_arg(yaml_output_file)
logger.info(f"Executing {cmd}")
cmd()

# Check if the converter was successful
# if os.path.exists(yaml_output_file):
# rm_p(yaml_output_file)


if __name__ == "__main__":
Expand Down

0 comments on commit 5c305aa

Please sign in to comment.