Script to create a lat/lon text file for GenVxMask to read and create…

… a gridded mask file to use, based on radiosonde station locations in a GDAS prepBUFR file.
dtcenter · Dec 23, 2024 · 165c166 · 165c166
1 parent 60a15ff
commit 165c166
Showing 1 changed file with 135 additions and 0 deletions.
diff --git a/...model_applications/land_surface/PointStat_fcstUFS_obsGDAS_CTP_HI/create_raob_mask_file.py b/...model_applications/land_surface/PointStat_fcstUFS_obsGDAS_CTP_HI/create_raob_mask_file.py
@@ -0,0 +1,135 @@
+import os
+import numpy as np
+import pandas as pd
+import sys
+from metcalcpy.diagnostics import land_surface
+from metpy.units import units
+
+# Append the MET Python module directory to the path to import the functions
+sys.path.append(os.environ.get('MET_PYTHON_DIR'))
+from met.point_nc import nc_point_obs
+
+pd.set_option('display.max_rows', None)
+
+# Get the input PB2NC output filename as the input to this script
+pb2nc = sys.argv[1]
+
+# Get the output filename for this script
+outfile = sys.argv[2]
+
+# Make the output directory if it doesn't exist
+if not os.path.exists(os.path.dirname(outfile)):
+  os.makedirs(os.path.dirname(outfile))
+
+# Get the Pandas dataframe of the PB2NC data
+df = nc_point_obs(pb2nc).to_pandas()
+
+# Group the 11-column data by station. This will effectively create "soundings" for each site
+groups = df.groupby('sid')
+print("FOUND %04d SITES TO PROCESS" % (int(groups.ngroups)))
+
+# The first row of each group contains the metadata we want to retain
+point_data = groups.first().reset_index()[['sid','typ','vld','lat','lon','elv']]
+
+# Filter out stations to not process here
+point_data['site_digit'] = point_data['sid'].astype('str').str[0]
+point_data = point_data[point_data['site_digit'].isin(['7'])]
+point_data = point_data.drop(['site_digit'],axis=1)
+
+# Subset to only sid/lat/lon for unique sid's
+point_data = point_data[point_data['sid'].isin(point_data['sid'].unique())]
+print("TOTAL OF %04d UNIQUE SITES" % (int(len(point_data))))
+
+# Join the lat/lon as another column
+point_data['latlon'] = point_data['lat'].astype('str')+' '+point_data['lon'].astype('str')
+
+# Write out the text file that GenVxMask needs
+point_data['latlon'].astype('object').to_csv(outfile,index=False,sep=' ',header=['RAOB_SITES'],quoting=3,escapechar=' ')
+
+exit()
+
+# Array to hold the CTP values for each station
+ctp = np.array([])
+
+# Process each group, which is defined as a single site
+# Each site will have the MET 11-column data.
+for name,group in groups:
+
+  print("")
+  print("PROCESSING SITE: %s" % (name))
+
+  # First, make sure there is only one valid time
+  timegrp = group.groupby('vld')
+  if timegrp.ngroups>1:
+    print("INFO: FOUND MULTIPLE SOUNDINGS FOR THIS SITE.")
+    print("USING THE FIRST")
+    timegrp_name = [sg_name for sg_name,sg_df in timegrp]
+    prof = timegrp.get_group(timegrp_name[0])
+  else:
+    prof = group
+
+  # Filter out stations that we don't want to process
+  if name not in point_data['sid'].values.tolist():
+    continue
+
+  # In each group here, there are rows for each variable at each pressure level.
+  # Thus, we can subset based on TMP and just use the pressure data for TMP.
+  sub = prof[prof['var']=='TMP']
+
+  # Ensure the temperature subset has data
+  if len(sub)==0:
+    print("ERROR! NO TMP DATA.")
+    print("UNABLE TO COMPUTE CTP FOR SID: %s" % (name))
+    ctp = np.append(ctp,-9999.)
+    continue
+
+  # Subset out the data for this site into individual arrays
+  # and add units using MetPy
+  tmpsub = sub['obs'].astype('float').values*units('degK')
+  prssub = sub['lvl'].astype('float').values*units('hPa')
+  qcsub = sub['qc'].astype('int').values
+
+  # The pressures must exceed 300 hPa above the lowest in the sounding
+  if max(prssub.m)<= min(prssub.m+300.0):
+    print("ERROR! SOUNDING TOP PRESSURE DOES NOT EXCEED 300 hPa ABOVE THE LOWEST PRESSURE.")
+    print("UNABLE TO COMPUTE CTP FOR SID: %s" % (name))
+    ctp = np.append(ctp,-9999.)
+  else:
+    # Append the CTP value
+    thisctp = land_surface.calc_ctp(prssub,tmpsub)
+    ctp = np.append(ctp,thisctp.m)
+
+# After each station is processed, add in the missing 11-column data
+# lvl --> set to 1000.0
+# hgt --> set to 0
+# qc --> set to 'NA' for now
+# var --> set to "CTP"
+# typ --> reset from ADPUPA to ADPSFC
+point_data['obs'] = ctp
+point_data['lvl'] = [1000.0]*len(point_data)
+point_data['hgt'] = [0]*len(point_data)
+point_data['qc'] = ['NA']*len(point_data)
+point_data['var'] = ['CTP']*len(point_data)
+point_data['typ'] = ['ADPSFC']*len(point_data)
+
+# Assign proper dtypes
+met_col_dtypes = {'typ':'string',
+                  'sid':'string',
+                  'vld':'string',
+                  'lat':'float64',
+                  'lon':'float64',
+                  'elv':'float64',
+                  'var':'string',
+                  'lvl':'float64',
+                  'hgt':'float64',
+                  'qc':'string',
+                  'obs':'float64'}
+point_data = point_data.astype(met_col_dtypes)
+
+# Reorder the columns to be correct
+point_data = point_data[['typ','sid','vld','lat','lon','elv','var','lvl','hgt','qc','obs']]
+
+print(point_data)
+
+# Convert to MET object
+point_data = point_data.values.tolist()