Have the driver script process both python scripts and YAMLs + bufr2i…

…oda.x (#730) This PR allows for all obtypes to be processed in two loops. We still need to figure out a way to process these in parallel (somewhat).
NOAA-EMC · Nov 15, 2023 · 5c305aa · 5c305aa
1 parent 6e6a480
commit 5c305aa
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 47 deletions.
diff --git a/parm/ioda/bufr2ioda/atms_beamwidth.txt b/parm/ioda/bufr2ioda/atms_beamwidth.txt
@@ -0,0 +1,36 @@
+# LOC = data/preproc/atms_beamwidth.dat
+# WMO Satellite ID
+224
+# Version identifier
+1
+# Sampling distance (deg)
+1.11
+# Number of channels to modify
+22 
+# Channel, beam width, new width, cutoff, nxaverage, nyaverage, QC dist
+# Note: to use FFT technique, set new width and cutoff as required and set 
+# nxaverage and nyaverage to 0. To use simple averaging set new width to 0.0
+# and use nxaverage and nyaverage (e.g. 3 3 for 3x3 averaging).
+# QC Dist gives number of points around missing value to flag as missing.
+ 1 5.2 3.33 0.4 0 0 11
+ 2 5.2 3.33 0.4 0 0 11
+ 3 2.2 3.33 0.0 0 0  5
+ 4 2.2 3.33 0.0 0 0  5
+ 5 2.2 3.33 0.0 0 0  5 
+ 6 2.2 3.33 0.0 0 0  5 
+ 7 2.2 3.33 0.0 0 0  5 
+ 8 2.2 3.33 0.0 0 0  5 
+ 9 2.2 3.33 0.0 0 0  5 
+10 2.2 3.33 0.0 0 0  5 
+11 2.2 3.33 0.0 0 0  5 
+12 2.2 3.33 0.0 0 0  5 
+13 2.2 3.33 0.0 0 0  5 
+14 2.2 3.33 0.0 0 0  5 
+15 2.2 3.33 0.0 0 0  5 
+16 2.2 3.33 0.0 0 0  5 
+17 1.1 3.33 0.0 0 0  5
+18 1.1 3.33 0.0 0 0  5
+19 1.1 3.33 0.0 0 0  5
+20 1.1 3.33 0.0 0 0  5
+21 1.1 3.33 0.0 0 0  5
+22 1.1 3.33 0.0 0 0  5
diff --git a/parm/ioda/bufr_atms.yaml → parm/ioda/bufr2ioda/bufr2ioda_atms.yaml b/parm/ioda/bufr_atms.yaml → parm/ioda/bufr2ioda/bufr2ioda_atms.yaml
@@ -1,7 +1,7 @@
 observations:
   - obs space:
       name: bufr
-      obsdatain: "$(BUFR_in)"
+      obsdatain: "{{ DMPDIR }}/{{ RUN }}.{{ PDY }}/{{ cyc }}/atmos/{{ RUN }}.t{{ cyc }}z.atms.tm00.bufr_d"
 
       exports:
         variables:
@@ -51,9 +51,9 @@ observations:
           sensorScanAngle:
             sensorScanAngle:
               fieldOfViewNumber: "*/FOVN"
-              heightOfStation: "*/HMSL"
               scanStart: -52.725
               scanStep: 1.110
+              sensor: atms
 
           sensorChannelNumber:
             query: "*/ATMSCH/CHNM"
@@ -80,10 +80,11 @@ observations:
               map:
                 _224: npp
                 _225: n20
+#               _226: n21
 
     ioda:
       backend: netcdf
-      obsdataout: "$(IODA_out)"
+      obsdataout: "{{ COM_OBS }}/{{ RUN }}.t{{ cyc }}z.atms.$(splitvar).tm00.nc"
 
       dimensions:
         - name: Channel

diff --git a/ush/ioda/bufr2ioda/gen_bufr2ioda_yaml.py b/ush/ioda/bufr2ioda/gen_bufr2ioda_yaml.py
@@ -5,52 +5,30 @@
 # and certain configuration parameters
 import argparse
 import os
-from wxflow import Template, TemplateConstants, YAMLFile
+from wxflow import Logger, parse_j2yaml, cast_strdict_as_dtypedict, save_as_yaml
+from wxflow import Template, TemplateConstants
 
+# initialize root logger
+logger = Logger('gen_bufr2ioda_yaml.py', level='INFO', colored_log=True)
 
-# list of satellite radiance BUFR files that need split by SatId
-sat_list = [
-    'atms',
-    '1bamua',
-    '1bmhs',
-    'crisf4',
-    'iasidb',
-]
 
-
-def gen_bufr_yaml(config):
-    # open the template input file
-    bufr_yaml = YAMLFile(path=config['template yaml'])
-    # determine if splits need in the output file path
-    obtype = config['obtype']
-    if obtype in sat_list:
-        # split by satellite platform
-        obtype_out = f"{obtype}_{{splits/satId}}"
-    else:
-        obtype_out = obtype
-    # construct the output IODA file path
-    output_ioda = [
-        config['run'],
-        f"t{config['cyc']:02}z",
-        obtype_out,
-        'nc',
-    ]
-    output_ioda_str = '.'.join(output_ioda)
-    output_ioda_file = os.path.join(config['output dir'], output_ioda_str)
-    # construct the template substitution dict
-    substitutions = {
-        'BUFR_in': config['input file'],
-        'IODA_out': output_ioda_file,
-    }
-    # substitue templates
-    bufr_yaml = Template.substitute_structure(bufr_yaml, TemplateConstants.DOLLAR_PARENTHESES, substitutions.get)
-    # write out BUFR converter YAML file
-    bufr_yaml.save(config['output yaml file'])
+def gen_bufr_yaml(config, template, output):
+    # read in templated YAML and do substitution
+    logger.info(f"Using {template} as input")
+    bufr_config = parse_j2yaml(template, config)
+    # need to do some special manipulation for the splits
+    substitutions = {'splitvar': '{splits/satId}'}
+    bufr_config = Template.substitute_structure(bufr_config, TemplateConstants.DOLLAR_PARENTHESES, substitutions.get)
+    save_as_yaml(bufr_config, output)
+    logger.info(f"Wrote to {output}")
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('-c', '--config', type=str, help='Input YAML Configuration', required=True)
+    parser.add_argument('-t', '--template', type=str, help='Input YAML template', required=True)
+    parser.add_argument('-o', '--output', type=str, help='Output YAML file', required=True)
     args = parser.parse_args()
-    config = YAMLFile(path=args.config)
-    gen_bufr_yaml(config)
+    # get the config from your environment
+    config = cast_strdict_as_dtypedict(os.environ)
+    # call the parsing function
+    gen_bufr_yaml(config, args.template, args.output)
diff --git a/ush/ioda/bufr2ioda/run_bufr2ioda.py b/ush/ioda/bufr2ioda/run_bufr2ioda.py
@@ -2,8 +2,10 @@
 import argparse
 import glob
 import os
+import shutil
 from pathlib import Path
 from gen_bufr2ioda_json import gen_bufr_json
+from gen_bufr2ioda_yaml import gen_bufr_yaml
 from wxflow import (Logger, Executable, cast_as_dtype, logit,
                     to_datetime, datetime_to_YMDH, Task, rm_p)
 
@@ -18,6 +20,7 @@ def bufr2ioda(current_cycle, RUN, DMPDIR, config_template_dir, COM_OBS):
     # Get gdasapp root directory
     DIR_ROOT = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../.."))
     USH_IODA = os.path.join(DIR_ROOT, "ush", "ioda", "bufr2ioda")
+    BIN_GDAS = os.path.join(DIR_ROOT, "build", "bin")
 
     # Create output directory if it doesn't exist
     os.makedirs(COM_OBS, exist_ok=True)
@@ -27,14 +30,21 @@ def bufr2ioda(current_cycle, RUN, DMPDIR, config_template_dir, COM_OBS):
         'RUN': RUN,
         'current_cycle': current_cycle,
         'DMPDIR': DMPDIR,
-        'COM_OBS': COM_OBS
+        'COM_OBS': COM_OBS,
+        'PDY': current_cycle.strftime('%Y%m%d'),
+        'cyc': current_cycle.strftime('%H'),
     }
 
+    # copy necessary fix files to runtime directory
+    shutil.copy(os.path.join(config_template_dir, "atms_beamwidth.txt"),
+                os.path.join(os.getcwd(), "atms_beamwidth.txt"))
+
     # Specify observation types to be processed by a script
     BUFR_py_files = glob.glob(os.path.join(USH_IODA, 'bufr2ioda_*.py'))
     BUFR_py_files = [os.path.basename(f) for f in BUFR_py_files]
     BUFR_py = [f.replace('bufr2ioda_', '').replace('.py', '') for f in BUFR_py_files]
 
+    # NOTE or TODO - how to parallelize these loops????
     for obtype in BUFR_py:
         logger.info(f"Convert {obtype}...")
         json_output_file = os.path.join(COM_OBS, f"{obtype}_{datetime_to_YMDH(current_cycle)}.json")
@@ -51,8 +61,31 @@ def bufr2ioda(current_cycle, RUN, DMPDIR, config_template_dir, COM_OBS):
         cmd()
 
         # Check if the converter was successful
-        if os.path.exists(json_output_file):
-            rm_p(json_output_file)
+        # if os.path.exists(json_output_file):
+        #     rm_p(json_output_file)
+
+    # Specify observation types to be processed by the bufr2ioda executable
+    BUFR_yaml_files = glob.glob(os.path.join(config_template_dir, '*.yaml'))
+    BUFR_yaml_files = [os.path.basename(f) for f in BUFR_yaml_files]
+    BUFR_yaml = [f.replace('bufr2ioda_', '').replace('.yaml', '') for f in BUFR_yaml_files]
+
+    for obtype in BUFR_yaml:
+        logger.info(f"Convert {obtype}...")
+        yaml_output_file = os.path.join(COM_OBS, f"{obtype}_{datetime_to_YMDH(current_cycle)}.yaml")
+        filename = 'bufr2ioda_' + obtype + '.yaml'
+        template = os.path.join(config_template_dir, filename)
+        gen_bufr_yaml(config, template, yaml_output_file)
+
+        # use the bufr2ioda executable for the ob type
+        bufr2iodaexe = BIN_GDAS + '/bufr2ioda.x'
+        cmd = Executable(bufr2iodaexe)
+        cmd.add_default_arg(yaml_output_file)
+        logger.info(f"Executing {cmd}")
+        cmd()
+
+        # Check if the converter was successful
+        # if os.path.exists(yaml_output_file):
+        #     rm_p(yaml_output_file)
 
 
 if __name__ == "__main__":