Skip to content

Commit

Permalink
making a number of log statements info to debug level
Browse files Browse the repository at this point in the history
  • Loading branch information
PhillipsOwen committed Nov 4, 2024
1 parent a44c7b7 commit 5d3148e
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 85 deletions.
4 changes: 2 additions & 2 deletions src/common/generate_urls_from_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ def build_url_list_from_yaml_and_times(self, ensemble='nowcast')->list:
url = construct_url_from_yaml( config, time, self.instance_name, ensemble, self.grid_name, hurricane_yaml_year=self.hurricane_yaml_year, hurricane_yaml_source=self.hurricane_yaml_source )
if url not in urls:
urls.append(url)
logger.info('Constructed %s urls of ensemble %s based on the YML', urls, ensemble)
logger.debug('Constructed %s urls of ensemble %s based on the YML', urls, ensemble)
return urls

# Approach Used by ADDA
Expand Down Expand Up @@ -568,7 +568,7 @@ def main(args):
config_name=args.config_name if args.config_name is not None else os.path.join(os.path.dirname(__file__), '../config', 'url_framework.yml')

# Set up IO env
logger.info("Product Level Working in %s.", os.getcwd())
logger.debug("Product Level Working in %s.", os.getcwd())

if args.instance_name is not None:
logger.debug('Ignoring args.instance_name for the testing sequence')
Expand Down
149 changes: 71 additions & 78 deletions src/common/geopoints_url.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'''
"""
MIT License
Copyright (c) 2022,2023,2024 Renaissance Computing Institute
Expand All @@ -8,33 +8,29 @@
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''
"""
Time series extraction
Authors: Jeffrey L. Tilson, Brian O. Blanton 8/2024
"""

#!/usr/bin/env python
# coding: utf-8
# pylint: skip-file

import sys
import pandas as pd
import numpy as np
import time as tm
import src.common.utilities as utilities
import src.common.generate_urls_from_times as genurls
from argparse import ArgumentParser

# pylint: skip-file

# create a logger
logger = utilities.logger

# Define some basic mappings for URL to variables names. Can override using CI variables
var_mapper={'fort':'zeta','swan':'swan_HS'}
var_mapper = {'fort': 'zeta', 'swan': 'swan_HS'}


def guess_variable_name(url)->str:
def guess_variable_name(url) -> str:
"""
Simply search the given URL for occurances of ither fort or swan. Choose the variable approapriately. User may always
override using --variable_name
Expand All @@ -44,14 +40,15 @@ def guess_variable_name(url)->str:
Returns:
varname: <str>. Guess is varname is zeta or swan_HS based on url nomenclature and specifications in the var_mapper dict
"""
varname=None
for key,value in var_mapper.items():
varname = None
for key, value in var_mapper.items():
if isinstance(key, str) and key.casefold() in url.casefold():
varname=value
varname = value
break
return varname

def strip_ensemble_from_url(urls)->str:

def strip_ensemble_from_url(urls) -> str:
"""
We mandate that the URLs input to this fetcher are those used to access the TDS server used in APSViz. The "ensemble" information will be in position .split('/')[-2]
eg. 'http://tds.renci.org/thredds/dodsC/2021/nam/2021052318/hsofs/hatteras.renci.org/hsofs-nam-bob-2021/nowcast/fort.63.nc'
Expand All @@ -64,11 +61,12 @@ def strip_ensemble_from_url(urls)->str:
url = grab_first_url_from_urllist(urls)
try:
words = url.split('/')
ensemble=words[-2] # Usually nowcast,forecast, etc
ensemble = words[-2] # Usually nowcast,forecast, etc
except IndexError as e:
logger.exception(f'strip_ensemble_from_url Unexpected failure try next:')
return ensemble


def first_true(iterable, default=False, pred=None):
"""
itertools recipe found in the Python 3 docs
Expand All @@ -83,7 +81,8 @@ def first_true(iterable, default=False, pred=None):
"""
return next(filter(pred, iterable), default)

def grab_first_url_from_urllist(urls)->str:

def grab_first_url_from_urllist(urls) -> str:
"""
eg. 'http://tds.renci.org/thredds/dodsC/2021/nam/2021052318/hsofs/hatteras.renci.org/hsofs-nam-bob-2021/nowcast/fort.63.nc'
Expand All @@ -98,59 +97,60 @@ def grab_first_url_from_urllist(urls)->str:
url = first_true(urls)
return url


def main(args):
variable_name=args.variable_name
url=args.url
lon=args.lon
lat=args.lat
nearest_neighbors=args.kmax
ndays=args.ndays # Look back/forward
logger.info('Input URL word is %s',url)
variable_name = args.variable_name
url = args.url
lon = args.lon
lat = args.lat
nearest_neighbors = args.kmax
ndays = args.ndays # Look back/forward

logger.info('Input URL word is %s', url)

if variable_name is None:
variable_name=guess_variable_name(url)
variable_name = guess_variable_name(url)
if variable_name is None:
logger.error('Variable name invald or not identified')
sys.exit(1)
logger.info(f' Identified variable name is {variable_name}')
logger.debug(f' Identified variable name is {variable_name}')

ensemble=strip_ensemble_from_url([url])
if args.ensemble is not None: # Else use the ensemble present in the input URL. Allow us to input a forecast but choose the nowcast
ensemble = strip_ensemble_from_url([url])
if args.ensemble is not None: # Else use the ensemble present in the input URL. Allow us to input a forecast but choose the nowcast
ensemble = args.ensemble
logger.info(f'Input URL ensemble determined to be {ensemble}')
logger.debug(f'Input URL ensemble determined to be {ensemble}')

# Try to setup proper header names for ADC/SWN and for nowcast/forecasr
dataproduct='Forecast'
if ensemble=='nowcast':
dataproduct='Nowcast'
dataproduct = 'Forecast'
if ensemble == 'nowcast':
dataproduct = 'Nowcast'
# Now figure out data source: adcirc or swan
datasrc='APS'
if variable_name=='swan_HS':
datasrc='SWAN'
headername=f'{datasrc} {dataproduct}'
logger.info(f' Header name defined to be {headername}')
datasrc = 'APS'
if variable_name == 'swan_HS':
datasrc = 'SWAN'
headername = f'{datasrc} {dataproduct}'
logger.debug(f' Header name defined to be {headername}')

if ndays <= 0:
logger.info(f'Build list of URLs to fetch: ndays lookback is {ndays}')
rpl = genurls.generate_urls_from_times(url=url,timein=None, timeout=None, ndays=ndays, grid_name=None, instance_name=None, config_name=None)
logger.debug(f'Build list of URLs to fetch: ndays lookback is {ndays}')
rpl = genurls.generate_urls_from_times(url=url, timein=None, timeout=None, ndays=ndays, grid_name=None, instance_name=None, config_name=None)
new_urls = rpl.build_url_list_from_template_url_and_offset(ensemble=ensemble)
logger.debug('New URL list %s', new_urls)
logger.info('New URL list %s', new_urls)
else:
new_urls=[url]
logger.info('Number of URL to try and process is: %s', len(new_urls))
new_urls = [url]
logger.debug('Number of URL to try and process is: %s', len(new_urls))

logger.debug('Lon: %s, Lat: %s', lon, lat)
logger.info('Lon: %s, Lat: %s', lon, lat)
logger.debug('Selected nearest neighbors values is: %s', nearest_neighbors)

if len(new_urls) ==0:
if len(new_urls) == 0:
logger.error('No URLs identified given the input URL: %s. Abort', url)
sys.exit(1)

data_list=list()
exclude_list=list()
data_list = list()
exclude_list = list()

t0=tm.time()
t0 = tm.time()
for url in new_urls:
logger.debug('URL: %s', url)
try:
Expand All @@ -159,73 +159,66 @@ def main(args):
#df_product_metadata.to_csv(f'Product_meta.csv',header=args.keep_headers)
data_list.append(df_product_data)
exclude_list.append(df_excluded)
except (OSError,FileNotFoundError):
except (OSError, FileNotFoundError):
logger.warning('Current URL was not found: %s. Try another...', url)
pass
logger.info('Fetching Runtime was: %s seconds', tm.time()-t0)
logger.info('Fetching Runtime was: %s seconds', tm.time() - t0)

#If absolutely nothing comes back return a None
try:
df=pd.concat(data_list,axis=0)
df.columns=[headername]
df = (df.reset_index()
.drop_duplicates(subset='index', keep='last')
.set_index('index').sort_index())
df_excluded=pd.concat(exclude_list,axis=0)
df = pd.concat(data_list, axis=0)
df.columns = [headername]
df = (df.reset_index().drop_duplicates(subset='index', keep='last').set_index('index').sort_index())
df_excluded = pd.concat(exclude_list, axis=0)
df.index = df.index.strftime('%Y-%m-%d %H:%M:%S')
df.index.name='time'
df.index.name = 'time'
logger.debug('Dimension of final data array: %s', df.shape)
logger.debug('Dimension of excluded URL list array: %s', df_excluded.shape)
except ValueError:
logger.info('No data found for the specified lon/lat air. Return None')
df=None
df = None

# Final data outputs
# df.to_csv('Product_data_geopoints.csv')
# df_excluded.to_csv('Product_excluded_geopoints.csv')
logger.info('Finished. Runtime was: %s seconds', tm.time()-t0)

logger.info('Finished. Runtime was: %s seconds', tm.time() - t0)
return df


if __name__ == '__main__':
ret_val=0
ret_val = 0

try:
parser = ArgumentParser()
parser.add_argument('--lon', action='store', dest='lon', default=None, type=float,
help='lon: longitiude value for time series extraction')
parser.add_argument('--lat', action='store', dest='lat', default=None, type=float,
help='lat: latitude value for time series extraction')
parser.add_argument('--lon', action='store', dest='lon', default=None, type=float, help='lon: longitiude value for time series extraction')
parser.add_argument('--lat', action='store', dest='lat', default=None, type=float, help='lat: latitude value for time series extraction')
parser.add_argument('--variable_name', action='store', dest='variable_name', default=None, type=str,
help='Optional variable name of interest from the supplied url')
parser.add_argument('--kmax', action='store', dest='kmax', default=10, type=int,
help='nearest_neighbors values when performing the Query')
help='Optional variable name of interest from the supplied url')
parser.add_argument('--kmax', action='store', dest='kmax', default=10, type=int, help='nearest_neighbors values when performing the Query')
parser.add_argument('--alt_urlsource', action='store', dest='alt_urlsource', default=None, type=str,
help='Alternative location for the ADCIRC data - NOTE specific formatting requirements exist')
parser.add_argument('--url', action='store', dest='url', default=None, type=str,
help='Specify FQ URL')
parser.add_argument('--keep_headers', action='store_true', default=True,
help='Boolean: Indicates to add header names to output files')
help='Alternative location for the ADCIRC data - NOTE specific formatting requirements exist')
parser.add_argument('--url', action='store', dest='url', default=None, type=str, help='Specify FQ URL')
parser.add_argument('--keep_headers', action='store_true', default=True, help='Boolean: Indicates to add header names to output files')
parser.add_argument('--ensemble', action='store', dest='ensemble', default=None, type=str,
help='Choose overriding ensemble such as nowcast. Else internal code extracts from the URL')
help='Choose overriding ensemble such as nowcast. Else internal code extracts from the URL')
parser.add_argument('--ndays', action='store', dest='ndays', default=0, type=int,
help='ndays to scan: Default=0, <0 means look back. >0 means look forward')
help='ndays to scan: Default=0, <0 means look back. >0 means look forward')
args = parser.parse_args()

# log the input args
logger.debug('input args: %s',args)
logger.debug('input args: %s', args)

# Call the runner
df = main(args)

if df is not None:
logger.debug('Final output df:%s:%s',df.head(5),df.shape)
logger.debug('Final output df:%s:%s', df.head(5), df.shape)
else:
logger.debug('Final output df is None: No data found')

except Exception:
logger.exception("Exit: exception occured")
ret_val=1
ret_val = 1

sys.exit(ret_val)

10 changes: 5 additions & 5 deletions src/common/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def ComputeBasisRepresentation(xylist, agdict, agresults):
agresults['final_weights']=final_weights
agresults['final_jvals']=final_jvals
agresults['final_status']=final_status
logger.debug('Compute of basis took: %s seconds', tm.time()-t0)
logger.info('Compute of basis took: %s seconds', tm.time()-t0)
# Keep the list if the user needs to know after the fact
outside_elements = np.argwhere(np.isnan(final_weights).all(axis=1)).ravel()
agresults['outside_elements']=outside_elements
Expand Down Expand Up @@ -318,7 +318,7 @@ def WaterLevelSelection(t, data_list, final_weights):
df_single[f'P{vertex}']=dataseries[vertex].values
if df_single.count()[0] > 0 : # df.notna().sum()
final_list.append(df_single)
logger.info('Inserted one chosen df_single with non nan values for index %s at count number %s', index,count)
logger.debug('Inserted one chosen df_single with non nan values for index %s at count number %s', index,count)
break
logger.debug('Do Selection water series update')
try:
Expand Down Expand Up @@ -386,7 +386,7 @@ def ConstructReducedWaterLevelData_from_ds(ds, agdict, agresults, variable_name=
#logger.info('Selecting the weighted mean time series')
#df_final=WaterLevelReductions(t, data_list, final_weights)

logger.info('Selecting the greedy alg: first in list with not all nans time series')
logger.debug('Selecting the greedy alg: first in list with not all nans time series')
df_final=WaterLevelSelection(t, data_list, final_weights)

t0=tm.time()
Expand All @@ -412,9 +412,9 @@ def Combined_pipeline(url, variable_name, lon, lat, nearest_neighbors=10):
ds = f63_to_xr(url)
agdict=get_adcirc_grid_from_ds(ds)
agdict=attach_element_areas(agdict)
logger.debug('Compute_pipeline initiation: %s seconds', tm.time()-t0)
logger.info('Compute_pipeline initiation: %s seconds', tm.time()-t0)

logger.debug('Start annual KDTree pipeline LON: %s LAT: %s', geopoints[0][0], geopoints[0][1])
logger.info('Start annual KDTree pipeline LON: %s LAT: %s', geopoints[0][0], geopoints[0][1])
agdict=ComputeTree(agdict)
agresults=ComputeQuery(geopoints, agdict, kmax=nearest_neighbors)
agresults=ComputeBasisRepresentation(geopoints, agdict, agresults)
Expand Down

0 comments on commit 5d3148e

Please sign in to comment.