Skip to content

Commit

Permalink
Script to create a lat/lon text file for GenVxMask to read and create…
Browse files Browse the repository at this point in the history
… a gridded mask file to use, based on radiosonde station locations in a GDAS prepBUFR file.
  • Loading branch information
DanielAdriaansen committed Dec 23, 2024
1 parent 60a15ff commit 165c166
Showing 1 changed file with 135 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import os
import numpy as np
import pandas as pd
import sys
from metcalcpy.diagnostics import land_surface
from metpy.units import units

# Append the MET Python module directory to the path to import the functions
sys.path.append(os.environ.get('MET_PYTHON_DIR'))
from met.point_nc import nc_point_obs

pd.set_option('display.max_rows', None)

# Get the input PB2NC output filename as the input to this script
pb2nc = sys.argv[1]

# Get the output filename for this script
outfile = sys.argv[2]

# Make the output directory if it doesn't exist
if not os.path.exists(os.path.dirname(outfile)):
os.makedirs(os.path.dirname(outfile))

# Get the Pandas dataframe of the PB2NC data
df = nc_point_obs(pb2nc).to_pandas()

# Group the 11-column data by station. This will effectively create "soundings" for each site
groups = df.groupby('sid')
print("FOUND %04d SITES TO PROCESS" % (int(groups.ngroups)))

# The first row of each group contains the metadata we want to retain
point_data = groups.first().reset_index()[['sid','typ','vld','lat','lon','elv']]

# Filter out stations to not process here
point_data['site_digit'] = point_data['sid'].astype('str').str[0]
point_data = point_data[point_data['site_digit'].isin(['7'])]
point_data = point_data.drop(['site_digit'],axis=1)

# Subset to only sid/lat/lon for unique sid's
point_data = point_data[point_data['sid'].isin(point_data['sid'].unique())]
print("TOTAL OF %04d UNIQUE SITES" % (int(len(point_data))))

# Join the lat/lon as another column
point_data['latlon'] = point_data['lat'].astype('str')+' '+point_data['lon'].astype('str')

# Write out the text file that GenVxMask needs
point_data['latlon'].astype('object').to_csv(outfile,index=False,sep=' ',header=['RAOB_SITES'],quoting=3,escapechar=' ')

exit()

# Array to hold the CTP values for each station
ctp = np.array([])

# Process each group, which is defined as a single site
# Each site will have the MET 11-column data.
for name,group in groups:

print("")
print("PROCESSING SITE: %s" % (name))

# First, make sure there is only one valid time
timegrp = group.groupby('vld')
if timegrp.ngroups>1:
print("INFO: FOUND MULTIPLE SOUNDINGS FOR THIS SITE.")
print("USING THE FIRST")
timegrp_name = [sg_name for sg_name,sg_df in timegrp]
prof = timegrp.get_group(timegrp_name[0])
else:
prof = group

# Filter out stations that we don't want to process
if name not in point_data['sid'].values.tolist():
continue

# In each group here, there are rows for each variable at each pressure level.
# Thus, we can subset based on TMP and just use the pressure data for TMP.
sub = prof[prof['var']=='TMP']

# Ensure the temperature subset has data
if len(sub)==0:
print("ERROR! NO TMP DATA.")
print("UNABLE TO COMPUTE CTP FOR SID: %s" % (name))
ctp = np.append(ctp,-9999.)
continue

# Subset out the data for this site into individual arrays
# and add units using MetPy
tmpsub = sub['obs'].astype('float').values*units('degK')
prssub = sub['lvl'].astype('float').values*units('hPa')
qcsub = sub['qc'].astype('int').values

# The pressures must exceed 300 hPa above the lowest in the sounding
if max(prssub.m)<= min(prssub.m+300.0):
print("ERROR! SOUNDING TOP PRESSURE DOES NOT EXCEED 300 hPa ABOVE THE LOWEST PRESSURE.")
print("UNABLE TO COMPUTE CTP FOR SID: %s" % (name))
ctp = np.append(ctp,-9999.)
else:
# Append the CTP value
thisctp = land_surface.calc_ctp(prssub,tmpsub)
ctp = np.append(ctp,thisctp.m)

# After each station is processed, add in the missing 11-column data
# lvl --> set to 1000.0
# hgt --> set to 0
# qc --> set to 'NA' for now
# var --> set to "CTP"
# typ --> reset from ADPUPA to ADPSFC
point_data['obs'] = ctp
point_data['lvl'] = [1000.0]*len(point_data)
point_data['hgt'] = [0]*len(point_data)
point_data['qc'] = ['NA']*len(point_data)
point_data['var'] = ['CTP']*len(point_data)
point_data['typ'] = ['ADPSFC']*len(point_data)

# Assign proper dtypes
met_col_dtypes = {'typ':'string',
'sid':'string',
'vld':'string',
'lat':'float64',
'lon':'float64',
'elv':'float64',
'var':'string',
'lvl':'float64',
'hgt':'float64',
'qc':'string',
'obs':'float64'}
point_data = point_data.astype(met_col_dtypes)

# Reorder the columns to be correct
point_data = point_data[['typ','sid','vld','lat','lon','elv','var','lvl','hgt','qc','obs']]

print(point_data)

# Convert to MET object
point_data = point_data.values.tolist()

0 comments on commit 165c166

Please sign in to comment.