diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d4af324..f017092 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,8 @@ jobs: poetry install - name: Format with Ruff run: poetry run ruff format --check src tests + - name: Lint with Ruff + run: poetry run ruff check --output-format=github src tests build-ingest: name: Build Ingest image runs-on: ubuntu-latest diff --git a/ruff.toml b/ruff.toml index b9ae78d..02699c8 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,6 +1,17 @@ -select = ["E", "F", "UP", "I", "PTH", "PT", "B"] -# select = ["ALL"] +# For a list of available rules, see: https://docs.astral.sh/ruff/rules/ +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "UP", # pyupgrade + "I", # isort + "PTH", # flake8-use-pathlib + "PT", # flake8-pytest-style + "B", # bugbear + "SIM", # flake8-simplify +] ignore = [ - "E501" # Supress line-too-long warnings: trust black's judgement on this one. -] \ No newline at end of file + "E501", # Supress line-too-long warnings: trust the formatter's judgement on this one. + "W505", # Supress line-too-long warnings: trust the formatter's judgement on this one. +] diff --git a/src/vxingest/builder_common/builder.py b/src/vxingest/builder_common/builder.py index 06ea14f..6c61f9a 100644 --- a/src/vxingest/builder_common/builder.py +++ b/src/vxingest/builder_common/builder.py @@ -3,10 +3,10 @@ """ import logging -import os +from pathlib import Path -class Builder: # pylint: disable=too-many-arguments disable=too-many-instance-attributes +class Builder: """ Parent class for all Builders """ @@ -21,28 +21,28 @@ def __init__(self, load_spec, ingest_document): # self.do_profiling = True # set to True to enable build_document profiling self.do_profiling = False - def initialize_document_map(self): # pylint: disable=missing-function-docstring + def initialize_document_map(self): pass - def get_document_map(self): # pylint: disable=missing-function-docstring + def get_document_map(self): pass - def handle_data(self, **kwargs): # pylint: disable=missing-function-docstring + def handle_data(self, **kwargs): pass - def derive_id(self, **kwargs): # pylint: disable=missing-function-docstring + def derive_id(self, **kwargs): pass - def load_data(self, doc, key, element): # pylint: disable=missing-function-docstring + def load_data(self, doc, key, element): pass - def handle_document(self): # pylint: disable=missing-function-docstring + def handle_document(self): pass - def build_document(self, queue_element): # pylint: disable=missing-function-docstring + def build_document(self, queue_element): pass - def build_datafile_doc(self, file_name, data_file_id, origin_type): # pylint: disable=missing-function-docstring + def build_datafile_doc(self, file_name, data_file_id, origin_type): pass def create_data_file_id(self, subset, file_type, origin_type, file_name): @@ -50,9 +50,9 @@ def create_data_file_id(self, subset, file_type, origin_type, file_name): This method creates a metar grib_to_cb datafile id from the parameters """ try: - base_name = os.path.basename(file_name) + base_name = Path(file_name).name an_id = f"DF:{subset}:{file_type}:{origin_type}:{base_name}" return an_id - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logging.exception("%s create_data_file_id", self.__class__.__name__) return None diff --git a/src/vxingest/builder_common/builder_utilities.py b/src/vxingest/builder_common/builder_utilities.py index ebde948..2d5a805 100644 --- a/src/vxingest/builder_common/builder_utilities.py +++ b/src/vxingest/builder_common/builder_utilities.py @@ -21,7 +21,7 @@ def convert_to_iso(an_epoch): def initialize_data(doc): """initialize the data by just making sure the template data element has been removed. All the data elements are going to be top level elements""" - if "data" in doc.keys(): + if "data" in doc: del doc["data"] return doc @@ -55,7 +55,7 @@ def get_geo_index(fcst_valid_epoch, geo): return geo_index else: return latest_index - except Exception as _e: # pylint: disable=bare-except, disable=broad-except + except Exception as _e: logging.error("CTCBuilder.get_geo_index: Exception error: %s", str(_e)) return 0 @@ -76,6 +76,6 @@ def truncate_round(_n, decimals=0): def initialize_data_array(doc): """initialize the data by just making sure the template data element has been removed. All the data elements are going to be top level elements""" - if "data" in doc.keys(): + if "data" in doc: del doc["data"] return doc diff --git a/src/vxingest/builder_common/ingest_manager.py b/src/vxingest/builder_common/ingest_manager.py index 490e3b7..837c48c 100644 --- a/src/vxingest/builder_common/ingest_manager.py +++ b/src/vxingest/builder_common/ingest_manager.py @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) -class CommonVxIngestManager(Process): # pylint:disable=too-many-instance-attributes +class CommonVxIngestManager(Process): """ IngestManager is a Process Thread that manages an object pool of builders to ingest data from GSD grib2 files or netcdf files into documents that can be @@ -47,7 +47,6 @@ class CommonVxIngestManager(Process): # pylint:disable=too-many-instance-attrib and dies. """ - # pylint:disable=too-many-arguments def __init__( self, name, @@ -78,14 +77,14 @@ def __init__( self.logging_queue = logging_queue self.logging_configurer = logging_configurer - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) + if not Path(self.output_dir).exists(): + Path(self.output_dir).mkdir(parents=True, exist_ok=True) if not os.access(self.output_dir, os.W_OK): _re = RuntimeError("Output directory: %s is not writable!", self.output_dir) logger.exception(_re) raise _re - def process_queue_element(self, queue_element): # pylint: disable=missing-function-docstring + def process_queue_element(self, queue_element): pass def close_cb(self): @@ -104,7 +103,7 @@ def connect_cb(self): """ logger.info("data_type_manager - Connecting to couchbase") # get a reference to our cluster - # noinspection PyBroadException + try: timeout_options = ClusterTimeoutOptions( kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) @@ -125,7 +124,7 @@ def connect_cb(self): self.load_spec["cluster"] = self.cluster self.load_spec["collection"] = self.collection logger.info("Couchbase connection success") - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "*** builder_common.CommonVxIngestManager in connect_cb ***" ) @@ -148,7 +147,6 @@ def run(self): self.logging_configurer(self.logging_queue) logger.info(f"Registered new process: {self.thread_name}") - # noinspection PyBroadException try: self.cb_credentials = self.load_spec["cb_connection"] # get a connection @@ -185,7 +183,7 @@ def run(self): self.thread_name, ) break - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception("%s: *** Error in IngestManager run ***", self.thread_name) raise _e finally: @@ -201,7 +199,7 @@ def write_document_to_cb(self, queue_element, document_map): """ # The document_map is all built now so write all the # documents in the document_map into couchbase - # noinspection PyBroadException + try: logger.info( "process_element writing documents for queue_element :%s with threadName: %s", @@ -237,7 +235,7 @@ def write_document_to_cb(self, queue_element, document_map): "process_element - executing upsert: elapsed time: %s", str(upsert_stop_time - upsert_start_time), ) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: *** Error writing to Couchbase: in process_element writing document ***", self.thread_name, @@ -266,8 +264,8 @@ def write_document_to_files(self, file_name, document_map): else: Path(self.output_dir).mkdir(parents=True, exist_ok=True) try: - file_name = os.path.basename(file_name) + ".json" - complete_file_name = os.path.join(self.output_dir, file_name) + file_name = Path(file_name).name + ".json" + complete_file_name = Path(self.output_dir) / file_name # how many documents are we writing? Log it for alert num_documents = len(list(document_map.values())) logger.info( @@ -276,17 +274,16 @@ def write_document_to_files(self, file_name, document_map): num_documents, complete_file_name, ) - _f = open(complete_file_name, "w", encoding="utf-8") - # we need to write out a list of the values of the _document_map for cbimport - json_data = json.dumps(list(document_map.values())) - _f.write(json_data) - _f.close() - except Exception as _e1: # pylint:disable=broad-except + with Path(complete_file_name).open("w", encoding="utf-8") as _f: + # we need to write out a list of the values of the _document_map for cbimport + json_data = json.dumps(list(document_map.values())) + _f.write(json_data) + except Exception as _e1: logger.exception( "write_document_to_files - trying write: Got Exception %s", str(_e1), ) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( ": *** {self.thread_name} Error writing to files: in process_element writing document*** %s", str(_e), diff --git a/src/vxingest/builder_common/load_backup_ingest_docs.py b/src/vxingest/builder_common/load_backup_ingest_docs.py index 8f2b08d..4c1b058 100644 --- a/src/vxingest/builder_common/load_backup_ingest_docs.py +++ b/src/vxingest/builder_common/load_backup_ingest_docs.py @@ -53,7 +53,7 @@ def __init__(self): def run(self, args): "thread start" - # noinspection PyBroadException + try: credentials_file = args["credentials_file"] # check for existence of file @@ -63,29 +63,25 @@ def run(self, args): + credentials_file + " can not be found!" ) - _f = open(credentials_file, encoding="utf-8") - yaml_data = yaml.load(_f, yaml.SafeLoader) + with Path(credentials_file).open(encoding="utf-8") as _f: + yaml_data = yaml.load(_f, yaml.SafeLoader) self.cb_credentials["host"] = yaml_data["cb_host"] self.cb_credentials["user"] = yaml_data["cb_user"] self.cb_credentials["password"] = yaml_data["cb_password"] - _f.close() + # Get JSON data as a dict f_name = args["file_name"] - # Opening JSON file - _f = open(f_name, encoding="utf-8") - # returns JSON object as - # a dictionary - list_data = json.load(_f) + with Path(f_name).open(encoding="utf-8") as _f: + list_data = json.load(_f) data = {} for elem in list_data: _id = elem["id"] del elem["id"] data[_id] = elem - _f.close() self.connect_cb() self.collection.upsert_multi(data) - except: # pylint: disable=bare-except, disable=broad-except - print(": *** %s Error in multi-upsert *** " + str(sys.exc_info())) + except Exception as e: + print(f" *** Error in multi-upsert *** {e}") finally: # close any mysql connections self.close_cb() @@ -98,7 +94,7 @@ def close_cb(self): def connect_cb(self): """Connect to database""" # get a reference to our cluster - # noinspection PyBroadException + try: options = ClusterOptions( PasswordAuthenticator( @@ -109,8 +105,8 @@ def connect_cb(self): "couchbase://" + self.cb_credentials["host"], options ) self.collection = self.cluster.bucket("mdata").default_collection() - except: # pylint: disable=bare-except, disable=broad-except - print("*** %s in connect_cb ***" + str(sys.exc_info())) + except Exception as e: + print(f"*** Error in connect_cb *** {e}") sys.exit("*** Error when connecting to mysql database: ") def main(self): diff --git a/src/vxingest/builder_common/vx_ingest.py b/src/vxingest/builder_common/vx_ingest.py index d545672..987a8ff 100644 --- a/src/vxingest/builder_common/vx_ingest.py +++ b/src/vxingest/builder_common/vx_ingest.py @@ -19,7 +19,6 @@ import sys import time from datetime import timedelta -from glob import glob from pathlib import Path # This pyproj import has to remain here in order to enforce the @@ -38,7 +37,7 @@ logger = logging.getLogger(__name__) -class CommonVxIngest: # pylint: disable=too-many-arguments disable=too-many-instance-attributes +class CommonVxIngest: """ Parent class for all VxIngest. This class is the commandline mechanism for using the builder. @@ -68,27 +67,25 @@ def __init__(self): self.ingest_document_id = None self.ingest_document = None - def parse_args(self, args): # pylint: disable=missing-function-docstring + def parse_args(self, args): """This method is intended to be overriden""" return args - def runit(self, args): # pylint: disable=missing-function-docstring + def runit(self, args): pass def write_load_job_to_files(self): """ write all the documents in the document_map into files in the output_dir """ - # noinspection PyBroadException try: Path(self.output_dir).mkdir(parents=True, exist_ok=True) try: file_name = self.load_job_id + ".json" - complete_file_name = os.path.join(self.output_dir, file_name) - _f = open(complete_file_name, "w", encoding="utf-8") - _f.write(json.dumps([self.load_spec["load_job_doc"]])) - _f.close() - except Exception as _e: # pylint: disable=broad-except + complete_file_name = Path(self.output_dir) / file_name + with Path(complete_file_name).open("w", encoding="utf-8") as _f: + _f.write(json.dumps([self.load_spec["load_job_doc"]])) + except Exception as _e: logger.info( "process_file - trying write load_job: Got Exception - %s", str(_e) ) @@ -135,7 +132,7 @@ def connect_cb(self): """ logger.debug("%s: data_type_manager - Connecting to couchbase") # get a reference to our cluster - # noinspection PyBroadException + try: timeout_options = ClusterTimeoutOptions( kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) @@ -155,7 +152,7 @@ def connect_cb(self): # stash the credentials for the VxIngestManager - see NOTE at the top of this file. self.load_spec["cb_credentials"] = self.cb_credentials logger.info("%s: Couchbase connection success") - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "*** builder_common.CommonVxIngest Error in connect_cb *** %s", str(_e) ) @@ -189,51 +186,51 @@ def get_file_list(self, df_query, directory, file_pattern): result = self.cluster.query(df_query) df_elements = list(result) df_full_names = [element["url"] for element in df_elements] - if os.path.exists(directory) and os.path.isdir(directory): + if Path(directory).exists() and Path(directory).is_dir(): file_list = sorted( - glob(directory + os.path.sep + file_pattern), key=os.path.getmtime + Path(directory).glob(file_pattern), key=os.path.getmtime ) for filename in file_list: try: # check to see if this file has already been ingested # (if it is not in the df_full_names - add it) - if filename not in df_full_names: + if str(filename) not in df_full_names: logger.debug( "%s - File %s is added because it isn't in any datafile document", self.__class__.__name__, filename, ) - file_names.append(filename) + file_names.append(str(filename)) else: # it was already processed so check to see if the mtime of the # file is greater than the mtime in the database entry, if so then add it df_entry = next( element for element in df_elements - if element["url"] == filename + if element["url"] == str(filename) ) - if int(os.path.getmtime(filename)) > int(df_entry["mtime"]): + if int(filename.stat().st_mtime) > int(df_entry["mtime"]): logger.debug( "%s - File %s is added because file mtime %s is greater than df mtime %s", self.__class__.__name__, filename, - int(os.path.getmtime(filename)), + int(filename.stat().st_mtime), int(df_entry["mtime"]), ) - file_names.append(filename) + file_names.append(str(filename)) else: logger.debug( "%s - File %s has already been processed - not adding", self.__class__.__name__, filename, ) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: # don't care, it just means it wasn't a properly formatted file per the mask continue if len(file_names) == 0: logger.info("get_file_list: No files to Process!") return file_names - except Exception as _e: # pylint: disable=bare-except, disable=broad-except + except Exception as _e: logger.error( "%s get_file_list Error: %s", self.__class__.__name__, @@ -257,8 +254,8 @@ def get_credentials(self, load_spec): + self.credentials_file + " can not be found!" ) - _f = open(self.credentials_file, encoding="utf-8") - _yaml_data = yaml.load(_f, yaml.SafeLoader) + with Path(self.credentials_file).open(encoding="utf-8") as _f: + _yaml_data = yaml.load(_f, yaml.SafeLoader) load_spec["cb_connection"] = {} load_spec["cb_connection"]["host"] = _yaml_data["cb_host"] load_spec["cb_connection"]["user"] = _yaml_data["cb_user"] @@ -266,7 +263,6 @@ def get_credentials(self, load_spec): load_spec["cb_connection"]["bucket"] = _yaml_data["cb_bucket"] load_spec["cb_connection"]["collection"] = _yaml_data["cb_collection"] load_spec["cb_connection"]["scope"] = _yaml_data["cb_scope"] - _f.close() return load_spec["cb_connection"] except (RuntimeError, TypeError, NameError, KeyError) as e: logger.error(f"*** Error reading credential file: {e} ***") diff --git a/src/vxingest/ctc_to_cb/ctc_builder.py b/src/vxingest/ctc_to_cb/ctc_builder.py index c85c1e1..b6b93f9 100644 --- a/src/vxingest/ctc_to_cb/ctc_builder.py +++ b/src/vxingest/ctc_to_cb/ctc_builder.py @@ -12,6 +12,7 @@ import logging import re import time +from pathlib import Path from pstats import Stats from couchbase.exceptions import DocumentNotFoundException, TimeoutException @@ -27,7 +28,7 @@ logger = logging.getLogger(__name__) -class CTCBuilder(Builder): # pylint:disable=too-many-instance-attributes +class CTCBuilder(Builder): """ Parent class for CTC builders 1) find all the stations for the region for this ingest (model and region) @@ -145,7 +146,7 @@ def derive_id(self, **kwargs): new_parts.append(value) new_id = ":".join(new_parts) return new_id - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception("CTCBuilder.derive_id") return None @@ -175,7 +176,7 @@ def translate_template_item(self, variable): else: value = variable.replace("*" + _ri, str(value)) return value - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "CtcBuilder.translate_template_item: Exception error: %s", str(_e) ) @@ -187,7 +188,7 @@ def handle_document(self): the self.modelData and self.obsData :return: The modified document_map """ - # noinspection PyBroadException + try: new_document = copy.deepcopy(self.template) if self.domain_stations is None: @@ -198,9 +199,8 @@ def handle_document(self): # make a copy of the template, which will become the new document # once all the translations have occured new_document = initialize_data_array(new_document) - for key in self.template.keys(): + for key in self.template: if key == "data": - # pylint: disable=assignment-from-no-return new_document = self.handle_data(doc=new_document) continue new_document = self.handle_key(new_document, key) @@ -216,7 +216,7 @@ def handle_document(self): "CtcBuilder.handle_document - cannot add document with key %s", str(new_document["id"]), ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s CtcBuilder.handle_document: Exception instantiating builder: error %s", self.__class__.__name__, @@ -234,7 +234,7 @@ def handle_key(self, doc, key): :param _key: A key to be processed, This can be a key to a primitive, or to another dictionary, or to a named function """ - # noinspection PyBroadException + try: if key == "id": an_id = self.derive_id(template_id=self.template["id"]) @@ -244,7 +244,7 @@ def handle_key(self, doc, key): if isinstance(doc[key], dict): # process an embedded dictionary tmp_doc = copy.deepcopy(self.template[key]) - for sub_key in tmp_doc.keys(): + for sub_key in tmp_doc: tmp_doc = self.handle_key(tmp_doc, sub_key) # recursion doc[key] = tmp_doc if ( @@ -256,7 +256,7 @@ def handle_key(self, doc, key): else: doc[key] = self.translate_template_item(doc[key]) return doc - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s CtcBuilder.handle_key: Exception in builder", self.__class__.__name__, @@ -296,7 +296,7 @@ def handle_named_function(self, named_function_def): dict_params[_p[1:]] = self.translate_template_item(_p) # call the named function using getattr replace_with = getattr(self, func)(dict_params) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s handle_named_function: %s params %s: Exception instantiating builder:", self.__class__.__name__, @@ -305,12 +305,12 @@ def handle_named_function(self, named_function_def): ) return replace_with - def handle_fcstValidEpochs(self): # pylint: disable=invalid-name + def handle_fcstValidEpochs(self): """iterate through all the fcstValidEpochs for which we have both model data and observation data. For each entry in the data section, i.e for each station build a data element that has model and observation data, then handle the document. """ - try: # pylint: disable=too-many-nested-blocks + try: _obs_data = {} for fve in self.model_fcst_valid_epochs: try: @@ -323,7 +323,7 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name self.region, fve["fcstValidEpoch"] ) self.domain_stations = full_station_name_list - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder build_document: error: %s", self.__class__.__name__, @@ -352,7 +352,7 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name self.__class__.__name__, fve["id"], ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s Error getting model document: %s", self.__class__.__name__, @@ -381,7 +381,7 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name obs_id, ) continue - for key in _obs_data["data"].keys(): + for key in _obs_data["data"]: self.obs_data[key] = _obs_data["data"][key] self.obs_station_names.append(key) self.obs_station_names.sort() @@ -392,14 +392,14 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name self.__class__.__name__, fve["id"], ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.exception( "%s problem getting obs document: %s", self.__class__.__name__, str(_e), ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s handle_fcstValidEpochs: Exception instantiating builder: error: %s", self.__class__.__name__, @@ -426,7 +426,7 @@ def build_document(self, queue_element): fcstValidEpoch and fcstLen. This will result in a document for each fcstLen within a fcstValidEpoch. 5) and 6) are enclosed in the handle_document() """ - # noinspection PyBroadException + try: # reset the builders document_map for a new file self.initialize_document_map() @@ -573,12 +573,14 @@ def build_document(self, queue_element): self.model_fcst_valid_epochs.append(fve) # if we have asked for profiling go ahead and do it - # pylint: disable=no-member + if self.do_profiling: with cProfile.Profile() as _pr: # process the fcstValidEpochs with profiling self.handle_fcstValidEpochs() - with open("profiling_stats.txt", "w", encoding="utf-8") as stream: + with Path("profiling_stats.txt").open( + "w", encoding="utf-8" + ) as stream: stats = Stats(_pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") @@ -587,13 +589,13 @@ def build_document(self, queue_element): else: # process the fcstValidEpochs without profiling self.handle_fcstValidEpochs() - # pylint: disable=assignment-from-no-return + logger.info( "There were %s stations not found", self.not_found_station_count ) document_map = self.get_document_map() return document_map - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder build_document: error: %s for element %s", self.__class__.__name__, @@ -602,7 +604,7 @@ def build_document(self, queue_element): ) return {} - def get_stations_for_region_by_geosearch(self, region_name, valid_epoch): # pylint: disable=unused-argument + def get_stations_for_region_by_geosearch(self, region_name, valid_epoch): # NOTE: this is currently broken because we have to modify this query to # work woth the data model that has data elements as a MAP indexed by station name """Using a geosearh return all the stations within the defined region @@ -639,7 +641,7 @@ def get_stations_for_region_by_geosearch(self, region_name, valid_epoch): # pyl _domain_stations.append(elem.fields["name"]) _domain_stations.sort() return _domain_stations - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder: error: %s", self.__class__.__name__, @@ -661,7 +663,7 @@ def get_legacy_stations_for_region(self, region_name): classic_stations = doc.content_as[dict]["stations"] classic_stations.sort() return classic_stations - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder: error: %s", self.__class__.__name__, @@ -732,7 +734,7 @@ def get_stations_for_region_by_sort(self, region_name, valid_epoch): continue _domain_stations.sort() return _domain_stations - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder: error: %s", self.__class__.__name__, @@ -797,13 +799,13 @@ def get_document_map(self): return self.document_map # named functions - def handle_data(self, **kwargs): # pylint:disable=too-many-branches + def handle_data(self, **kwargs): """ This routine processes the ctc data element. The data elements are all the same and always have the same keys which are thresholds, therefore this class does not implement handlers. :return: The modified document_map """ - try: # pylint: disable=too-many-nested-blocks + try: doc = kwargs["doc"] data_elem = {} # get the thresholds @@ -826,7 +828,7 @@ def handle_data(self, **kwargs): # pylint:disable=too-many-branches false_alarms = 0 correct_negatives = 0 none_count = 0 - for key in self.model_data["data"].keys(): + for key in self.model_data["data"]: try: model_station_name = key model_station = self.model_data["data"][key] @@ -886,7 +888,7 @@ def handle_data(self, **kwargs): # pylint:disable=too-many-branches < threshold ): correct_negatives = correct_negatives + 1 - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.exception("unexpected exception:%s", str(_e)) data_elem[threshold] = ( data_elem[threshold] if threshold in data_elem else {} @@ -898,7 +900,7 @@ def handle_data(self, **kwargs): # pylint:disable=too-many-branches data_elem[threshold]["none_count"] = none_count doc["data"] = data_elem return doc - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s handle_data: Exception : error: %s", self.__class__.__name__, @@ -906,7 +908,7 @@ def handle_data(self, **kwargs): # pylint:disable=too-many-branches ) return doc - def handle_time(self, params_dict): # pylint: disable=unused-argument + def handle_time(self, params_dict): """return the fcstValidTime for the current model in epoch Args: params_dict (dict): contains named_function parameters @@ -915,7 +917,7 @@ def handle_time(self, params_dict): # pylint: disable=unused-argument """ return self.model_data["fcstValidEpoch"] - def handle_iso_time(self, params_dict): # pylint: disable=unused-argument + def handle_iso_time(self, params_dict): """return the fcstValidTime for the current model in ISO Args: params_dict (dict): contains named_function parameters @@ -926,7 +928,7 @@ def handle_iso_time(self, params_dict): # pylint: disable=unused-argument self.model_data["fcstValidEpoch"] ).isoformat() - def handle_fcst_len(self, params_dict): # pylint: disable=unused-argument + def handle_fcst_len(self, params_dict): """returns the fcst lead time in hours for this document Args: params_dict (dict): contains named_function parameters diff --git a/src/vxingest/ctc_to_cb/run_ingest_threads.py b/src/vxingest/ctc_to_cb/run_ingest_threads.py index 5417143..b748282 100644 --- a/src/vxingest/ctc_to_cb/run_ingest_threads.py +++ b/src/vxingest/ctc_to_cb/run_ingest_threads.py @@ -163,7 +163,7 @@ def __init__(self): self.ingest_document = None super().__init__() - def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): # pylint:disable=too-many-locals + def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): """ This is the entry point for run_ingest_threads.py @@ -235,7 +235,6 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) ) logger.info(f"Starting {self.thread_count} processes") for thread_count in range(int(self.thread_count)): - # noinspection PyBroadException try: ingest_manager_thread = VxIngestManager( f"VxIngestManager-{thread_count+1}", # Processes are 1 indexed in the logger @@ -248,7 +247,7 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) ingest_manager_list.append(ingest_manager_thread) ingest_manager_thread.start() # This calls a .run() method in the class logger.info(f"Started thread: VxIngestManager-{thread_count+1}") - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error("*** Error in VXIngest %s***", str(_e)) raise _e # be sure to join all the threads to wait on them @@ -285,7 +284,7 @@ def main(self): # Tell the logging thread to finish up, too log_queue_listener.stop() sys.exit(0) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.info("*** FINISHED with exception %s***", str(_e)) # Tell the logging thread to finish up, too log_queue_listener.stop() diff --git a/src/vxingest/ctc_to_cb/vx_ingest_manager.py b/src/vxingest/ctc_to_cb/vx_ingest_manager.py index 673ccab..f93c7db 100644 --- a/src/vxingest/ctc_to_cb/vx_ingest_manager.py +++ b/src/vxingest/ctc_to_cb/vx_ingest_manager.py @@ -42,7 +42,7 @@ logger = logging.getLogger(__name__) -class VxIngestManager(CommonVxIngestManager): # pylint:disable=too-many-instance-attributes +class VxIngestManager(CommonVxIngestManager): """ IngestManager is a Process Thread that manages an object pool of builders to ingest data from GSD couchbase documents, producing new documents @@ -117,7 +117,7 @@ def set_builder_name(self, queue_element): self.ingest_type_builder_name = self.load_spec["ingest_documents"][ queue_element ]["builder_type"] - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: process_element: Exception getting ingest document for %s", self.thread_name, @@ -133,15 +133,15 @@ def process_queue_element(self, queue_element): _e: exception """ # get or instantiate the builder - # noinspection PyBroadException + start_process_time = int(time.time()) document_map = {} - # noinspection PyBroadException + try: logger.info("process_element - : start time: %s", str(start_process_time)) try: self.set_builder_name(queue_element) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: *** Error in IngestManager run getting builder name ***", self.thread_name, @@ -165,7 +165,7 @@ def process_queue_element(self, queue_element): else: logger.info("writing document map for %s to database", queue_element) self.write_document_to_cb(queue_element, document_map) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: Exception in builder: %s", self.thread_name, diff --git a/src/vxingest/grib2_to_cb/grib_builder.py b/src/vxingest/grib2_to_cb/grib_builder.py index 36698f0..11c1f2e 100644 --- a/src/vxingest/grib2_to_cb/grib_builder.py +++ b/src/vxingest/grib2_to_cb/grib_builder.py @@ -1,4 +1,3 @@ -# pylint: disable="too-many-lines" """ Program Name: Class grib_builder.py Contact(s): Randy Pierce @@ -10,8 +9,8 @@ import datetime as dt import logging import math -import os import sys +from pathlib import Path import numpy as np from vxingest.builder_common.builder_utilities import get_geo_index @@ -22,7 +21,7 @@ # Concrete builders -class GribModelBuilderV01(GribBuilder): # pylint:disable=too-many-instance-attributes +class GribModelBuilderV01(GribBuilder): """ This is the builder for model data that is ingested from grib2 files. It is a concrete builder specifically for the model data. @@ -33,7 +32,7 @@ def __init__( load_spec, ingest_document, number_stations=sys.maxsize, - ): # pylint:disable=too-many-arguments + ): """This builder creates a set of V01 model documents using the stations in the station list. This builder loads domain qualified station data into memory, and uses the domain_station list to associate a station with a grid value at an x_lat, x_lon point. @@ -72,7 +71,7 @@ def build_datafile_doc(self, file_name, data_file_id, origin_type): and imported with the other data documents. The VxIngest will query the existing dataFile documents to determine if a specific file has already been ingested. """ - mtime = os.path.getmtime(file_name) + mtime = Path(file_name).stat().st_mtime df_doc = { "id": data_file_id, "mtime": mtime, @@ -116,7 +115,7 @@ def load_data(self, doc, key, element): Returns: doc (Object): The document being created """ - if "data" not in doc.keys() or doc["data"] is None: + if "data" not in doc or doc["data"] is None: keys = list(element.keys()) doc["data"] = {} for i in range(len(self.domain_stations)): @@ -130,8 +129,7 @@ def load_data(self, doc, key, element): return doc # named functions - # pylint: disable=no-self-use - def handle_ceiling(self, params_dict): # pylint: disable=unused-argument, disable=too-many-branches + def handle_ceiling(self, params_dict): """ returns the ceiling values for all the stations in a list the dict_params aren't used here since the calculations are all done here @@ -173,10 +171,9 @@ def handle_ceiling(self, params_dict): # pylint: disable=unused-argument, disab "Cloud ceiling" ].values ceil_msl_values = [] - # print('fcst_valid_epoch',self.ds_translate_item_variables_map["fcst_valid_epoch"]) - for station in ( - self.domain_stations - ): # get the initial surface values and ceil_msl values for each station + + # get the initial surface values and ceil_msl values for each station + for station in self.domain_stations: geo_index = get_geo_index( self.ds_translate_item_variables_map["fcst_valid_epoch"], station["geo"], @@ -191,35 +188,25 @@ def handle_ceiling(self, params_dict): # pylint: disable=unused-argument, disab ceil_msl_values.append(60000) else: ceil_msl_values.append(ceil_var_values[y_gridpoint, x_gridpoint]) + ceil_agl = [] - i = 0 - for ( - station - ) in self.domain_stations: # determine the ceil_agl values for each station - if ceil_msl_values[i] == 60000: + # determine the ceil_agl values for each station + for i, _station in enumerate(self.domain_stations): + ceil_msl = ceil_msl_values[i] + surface = surface_values[i] + + if ceil_msl == 60000 or ceil_msl < -1000 or ceil_msl > 1e10: ceil_agl.append(60000) + elif ceil_msl is None or surface is None: + ceil_agl.append(None) + # handle weird '-1's in the grib files??? (from legacy code) + elif ceil_msl < 0: + ceil_agl.append(0) else: - if ceil_msl_values[i] is None or surface_values[i] is None: - ceil_agl.append(None) - else: - if ceil_msl_values[i] < -1000 or ceil_msl_values[i] > 1e10: - ceil_agl.append(60000) - else: - if ceil_msl_values[i] < 0: - # weird '-1's in the grib files??? (from legacy code) - ceil_agl.append(0) - else: - tmp_ceil = ( - ceil_msl_values[i] - surface_values[i] - ) * 3.281 - if tmp_ceil < 0: - ceil_agl.append(0) - else: - ceil_agl.append(tmp_ceil) - # print (station["geo"][0]['x_gridpoint'],station["geo"][0]['y_gridpoint'],round(ceil_msl_values[i],3), round(surface_values[i],3), round(ceil_agl[i],3)) - i = i + 1 + tmp_ceil = (ceil_msl - surface) * 3.281 # m -> ft + ceil_agl.append(0 if tmp_ceil < 0 else tmp_ceil) return ceil_agl - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_ceiling: Exception error: %s", self.__class__.__name__, @@ -254,7 +241,7 @@ def handle_visibility(self, params_dict): # relative humidity - convert to float - def handle_RH(self, params_dict): # pylint:disable=invalid-name + def handle_RH(self, params_dict): """translate relative humidity variable Args: params_dict (dict): named function parameters @@ -286,7 +273,7 @@ def kelvin_to_farenheight(self, params_dict): # WIND SPEED - def handle_wind_speed(self, params_dict): # pylint:disable=unused-argument + def handle_wind_speed(self, params_dict): """The params_dict aren't used here since we need to select two messages (self.grbs.select is expensive since it scans the whole grib file). Each message is selected once and the station location data saved in an array, @@ -330,15 +317,13 @@ def handle_wind_speed(self, params_dict): # pylint:disable=unused-argument ws_mph = [] for _i, uwind_ms in enumerate(uwind_ms_values): vwind_ms = vwind_ms_values[_i] - ws_ms = math.sqrt( # pylint:disable=c-extension-no-member - (uwind_ms * uwind_ms) + (vwind_ms * vwind_ms) - ) # pylint:disable=c-extension-no-member + ws_ms = math.sqrt((uwind_ms * uwind_ms) + (vwind_ms * vwind_ms)) ws_mph.append((float)((ws_ms / 0.447) + 0.5)) return ws_mph # wind direction - def handle_wind_direction(self, params_dict): # pylint:disable=unused-argument, disable=too-many-locals + def handle_wind_direction(self, params_dict): """The params_dict aren't used here since we need to select two messages (self.grbs.select is expensive since it scans the whole grib file). Each message is selected once and the station location data saved in an array, @@ -394,9 +379,7 @@ def handle_wind_direction(self, params_dict): # pylint:disable=unused-argument, theta = self.get_wind_theta( proj_params, lad_in_degrees, lov_in_degrees, longitude ) - radians = math.atan2( # pylint:disable=c-extension-no-member - u_val, v_val - ) # pylint:disable=c-extension-no-member + radians = math.atan2(u_val, v_val) wd_value = (radians * 57.2958) + theta + 180 # adjust for outliers if wd_value < 0: @@ -406,7 +389,7 @@ def handle_wind_direction(self, params_dict): # pylint:disable=unused-argument, _wd.append((float)(wd_value)) return _wd - def handle_wind_dir_u(self, params_dict): # pylint: disable=unused-argument + def handle_wind_dir_u(self, params_dict): """returns the wind direction U component for this document Args: params_dict (dict): contains named_function parameters but is unused here @@ -429,7 +412,7 @@ def handle_wind_dir_u(self, params_dict): # pylint: disable=unused-argument ) return uwind_ms - def handle_wind_dir_v(self, params_dict): # pylint: disable=unused-argument + def handle_wind_dir_v(self, params_dict): """returns the wind direction V component for this document Args: params_dict (dict): contains named_function parameters but is unused here @@ -451,7 +434,7 @@ def handle_wind_dir_v(self, params_dict): # pylint: disable=unused-argument ) return vwind_ms - def handle_specific_humidity(self, params_dict): # pylint: disable=unused-argument + def handle_specific_humidity(self, params_dict): """returns the specific humidity for this document Specific humidity:kg kg**-1 (instant):lambert:heightAboveGround:level 2 m Args: @@ -472,7 +455,7 @@ def handle_specific_humidity(self, params_dict): # pylint: disable=unused-argum spfh.append((float)(self.interp_grid_box(values, y_gridpoint, x_gridpoint))) return spfh - def handle_vegetation_type(self, params_dict): # pylint:disable=unused-argument + def handle_vegetation_type(self, params_dict): """returns the vegetation type for this document Args: params_dict (dict): contains named_function parameters but is unused here @@ -492,7 +475,7 @@ def handle_vegetation_type(self, params_dict): # pylint:disable=unused-argument ) return vegetation_type - def getName(self, params_dict): # pylint:disable=unused-argument,disable=invalid-name + def getName(self, params_dict): """translate the station name Args: params_dict (object): named function parameters - unused here @@ -504,7 +487,7 @@ def getName(self, params_dict): # pylint:disable=unused-argument,disable=invali station_names.append(station["name"]) return station_names - def handle_time(self, params_dict): # pylint: disable=unused-argument + def handle_time(self, params_dict): """return the time variable as an epoch Args: params_dict (object): named function parameters @@ -513,7 +496,7 @@ def handle_time(self, params_dict): # pylint: disable=unused-argument """ return (int)(self.ds_translate_item_variables_map["fcst_valid_epoch"]) - def handle_iso_time(self, params_dict): # pylint: disable=unused-argument + def handle_iso_time(self, params_dict): """return the time variable as an iso Args: params_dict (object): named function parameters @@ -524,7 +507,7 @@ def handle_iso_time(self, params_dict): # pylint: disable=unused-argument (int)(self.ds_translate_item_variables_map["fcst_valid_epoch"]) ).isoformat() - def handle_fcst_len(self, params_dict): # pylint: disable=unused-argument + def handle_fcst_len(self, params_dict): """return the fcst length variable as an int Args: params_dict (object): named function parameters diff --git a/src/vxingest/grib2_to_cb/grib_builder_parent.py b/src/vxingest/grib2_to_cb/grib_builder_parent.py index a3c5a1a..2c2b2a2 100644 --- a/src/vxingest/grib2_to_cb/grib_builder_parent.py +++ b/src/vxingest/grib2_to_cb/grib_builder_parent.py @@ -8,11 +8,10 @@ import copy import cProfile -import glob import logging import math -import os import sys +from pathlib import Path from pstats import Stats import pyproj @@ -28,7 +27,7 @@ logger = logging.getLogger(__name__) -class GribBuilder(Builder): # pylint: disable=too-many-arguments +class GribBuilder(Builder): """parent class for grib builders. This class contains methods that are common to all the grib builders. The entry point for every builder is the build_document(self, queue_element) which is common to all grib2 builders and is in this class.""" @@ -81,7 +80,6 @@ def get_grid( Returns: projection: projection object """ - # do not know how to disable pylint bad-option-value - probably a python2 - python3 problem init_projection = pyproj.Proj(proj_params) latlon_proj = pyproj.Proj(proj="latlon") lat_0 = latitude_of_first_grid_point_in_degrees @@ -90,7 +88,7 @@ def get_grid( init_transformer = pyproj.Transformer.from_proj( proj_from=latlon_proj, proj_to=init_projection ) - _x, _y = init_transformer.transform( # pylint: disable=unpacking-non-sequence + _x, _y = init_transformer.transform( lon_0, lat_0, radians=False ) # the lower left coordinates in the projection space @@ -154,9 +152,7 @@ def interp_grid_box(self, values, _y, _x): ) return interpolated_value except Exception as _e: - raise Exception( # pylint: disable=broad-exception-raised - f"Error in get_grid.interpGridBox - {str(_e)}" - ) from _e + raise Exception(f"Error in get_grid.interpGridBox - {str(_e)}") from _e def derive_id(self, **kwargs): """ @@ -179,14 +175,14 @@ def derive_id(self, **kwargs): value = str(self.handle_named_function(part)) else: if part.startswith("*"): - _v, _interp_v = self.translate_template_item(part) # pylint:disable=unused-variable + _v, _interp_v = self.translate_template_item(part) value = str(_v) else: value = str(part) new_parts.append(value) new_id = ":".join(new_parts) return new_id - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception("GribBuilder.derive_id") return None @@ -265,7 +261,7 @@ def translate_template_item(self, variable, single_return=False): (station_value, interpolated_value) for i in range(len(self.domain_stations)) ] - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "Builder.translate_template_item for variable %s: replacements: %s", str(variable), @@ -301,7 +297,7 @@ def handle_document(self): # make a copy of the template, which will become the new document # once all the translations have occured new_document = initialize_data_array(new_document) - for key in self.template.keys(): + for key in self.template: if key == "data": new_document = self.handle_data(doc=new_document) continue @@ -318,7 +314,7 @@ def handle_document(self): "GribBuilder.handle_document - cannot add document with key %s", str(new_document["id"]), ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s GribBuilder.handle_document: Exception instantiating builder: %s", self.__class__.__name__, @@ -336,7 +332,7 @@ def handle_key(self, doc, key): :param _key: A key to be processed, This can be a key to a primitive, or to another dictionary, or to a named function """ - # noinspection PyBroadException + try: if key == "id": an_id = self.derive_id(template_id=self.template["id"]) @@ -346,7 +342,7 @@ def handle_key(self, doc, key): if isinstance(doc[key], dict): # process an embedded dictionary tmp_doc = copy.deepcopy(self.template[key]) - for sub_key in tmp_doc.keys(): + for sub_key in tmp_doc: tmp_doc = self.handle_key(tmp_doc, sub_key) # recursion doc[key] = tmp_doc if ( @@ -358,7 +354,7 @@ def handle_key(self, doc, key): else: doc[key], _interp_v = self.translate_template_item(doc[key], True) return doc - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s GribBuilder.handle_key: Exception in builder:", self.__class__.__name__, @@ -397,7 +393,7 @@ def handle_named_function(self, named_function_def): dict_params[_p[1:]] = self.translate_template_item(_p) # call the named function using getattr replace_with = getattr(self, func)(dict_params) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s handle_named_function: %s params %s: Exception instantiating builder:", self.__class__.__name__, @@ -420,7 +416,7 @@ def handle_data(self, **kwargs): data_elem = {} data_key = next(iter(self.template["data"])) data_template = self.template["data"][data_key] - for key in data_template.keys(): + for key in data_template: try: value = data_template[key] # values can be null... @@ -428,7 +424,7 @@ def handle_data(self, **kwargs): value = self.handle_named_function(value) else: value = self.translate_template_item(value) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: value = [(None, None)] logger.warning( "%s Builder.handle_data - value is (None,None)", @@ -447,7 +443,7 @@ def handle_data(self, **kwargs): ) self.load_data(doc, data_key, data_elem) return doc - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s handle_data: Exception instantiating builder", self.__class__.__name__, @@ -458,21 +454,24 @@ def delete_idx_file(self, queue_element): """ cfgrib leaves .idx files in the directory - delete the .idx file """ - idx_pattern = queue_element.replace(".grib2", "") + ".*.idx" - file_list = glob.glob(idx_pattern) + queue_element = Path(queue_element) + basepath = queue_element.parent + idx_pattern = queue_element.name.replace(".grib2", "") + ".*.idx" + file_list = basepath.glob(idx_pattern) + # Iterate over the list of filepaths & remove each file. - for file_path in file_list: + for file in file_list: try: - os.remove(file_path) + file.unlink() except OSError as _e: logger.warning( "%s Builder.build_document Error - cannot delete idx file %s - %s", self.__class__.__name__, - file_path, + file, _e, ) - def build_document(self, queue_element): # pylint:disable=too-many-statements, disable=too-many-locals + def build_document(self, queue_element): """ This is the entry point for the gribBuilders from the ingestManager. The ingest manager is giving us a grib file to process from the queue. @@ -770,7 +769,6 @@ def build_document(self, queue_element): # pylint:disable=too-many-statements, lon = row["geo"][geo_index]["lon"] if lat == -90 and lon == 180: continue # don't know how to transform that station - # pylint: disable=unpacking-non-sequence ( _x, _y, @@ -780,7 +778,6 @@ def build_document(self, queue_element): # pylint:disable=too-many-statements, # use for debugging if you must # print (f"transform - lat: {lat}, lon: {lon}, x_gridpoint: {x_gridpoint}, y_gridpoint: {y_gridpoint}") try: - # pylint: disable=c-extension-no-member if ( math.floor(x_gridpoint) < 0 or math.ceil(x_gridpoint) >= max_x @@ -788,7 +785,7 @@ def build_document(self, queue_element): # pylint:disable=too-many-statements, or math.ceil(y_gridpoint) >= max_y ): continue - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder build_document processing station: error: %s", self.__class__.__name__, @@ -809,7 +806,9 @@ def build_document(self, queue_element): # pylint:disable=too-many-statements, if self.do_profiling: with cProfile.Profile() as _pr: self.handle_document() - with open("profiling_stats.txt", "w", encoding="utf-8") as stream: + with Path("profiling_stats.txt").open( + "w", encoding="utf-8" + ) as stream: stats = Stats(_pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") @@ -817,7 +816,7 @@ def build_document(self, queue_element): # pylint:disable=too-many-statements, stats.print_stats() else: self.handle_document() - # pylint: disable=assignment-from-no-return + document_map = self.get_document_map() data_file_id = self.create_data_file_id( self.subset, "grib2", self.template["model"], queue_element @@ -834,7 +833,7 @@ def build_document(self, queue_element): # pylint:disable=too-many-statements, document_map[data_file_doc["id"]] = data_file_doc self.delete_idx_file(queue_element) return document_map - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: Exception with builder build_document: file_name: %s, exception %s", self.__class__.__name__, diff --git a/src/vxingest/grib2_to_cb/run_ingest_threads.py b/src/vxingest/grib2_to_cb/run_ingest_threads.py index e02a65f..67c7dd4 100644 --- a/src/vxingest/grib2_to_cb/run_ingest_threads.py +++ b/src/vxingest/grib2_to_cb/run_ingest_threads.py @@ -164,7 +164,7 @@ def __init__(self): self.ingest_document = None super().__init__() - def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): # pylint:disable=too-many-locals + def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): """ This is the entry point for run_ingest_threads.py There is a file_pattern and a file_mask. The file_mask is a python time.strftime format e.g. '%y%j%H%f'. @@ -175,7 +175,7 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) self.thread_count = args["threads"] self.output_dir = args["output_dir"].strip() self.job_document_id = args["job_id"].strip() - if "file_pattern" in args.keys(): + if "file_pattern" in args: self.file_pattern = args["file_pattern"].strip() _args_keys = args.keys() if "number_stations" in _args_keys: @@ -249,7 +249,6 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) # Make the Pool of ingest_managers ingest_manager_list = [] for thread_count in range(int(self.thread_count)): - # noinspection PyBroadException try: self.load_spec["fmask"] = self.fmask ingest_manager_thread = VxIngestManager( @@ -263,7 +262,7 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) ) ingest_manager_list.append(ingest_manager_thread) ingest_manager_thread.start() - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error("*** Error in VXIngest %s***", str(_e)) # be sure to join all the threads to wait on them finished = [proc.join() for proc in ingest_manager_list] diff --git a/src/vxingest/grib2_to_cb/vx_ingest_manager.py b/src/vxingest/grib2_to_cb/vx_ingest_manager.py index 3d5d751..aa57842 100644 --- a/src/vxingest/grib2_to_cb/vx_ingest_manager.py +++ b/src/vxingest/grib2_to_cb/vx_ingest_manager.py @@ -44,7 +44,7 @@ logger = logging.getLogger(__name__) -class VxIngestManager(CommonVxIngestManager): # pylint:disable=too-many-instance-attributes +class VxIngestManager(CommonVxIngestManager): """ IngestManager is a Process Thread that manages an object pool of builders to ingest data from GSD grib2 files or netcdf files into documents that can be @@ -121,7 +121,7 @@ def set_builder_name(self, queue_element): if self.ingest_type_builder_name is None: try: self.ingest_type_builder_name = self.ingest_document["builder_type"] - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: process_element: Exception getting ingest document for %s ", self.thread_name, @@ -137,22 +137,22 @@ def process_queue_element(self, queue_element): _e: exception """ # get or instantiate the builder - # noinspection PyBroadException + start_process_time = int(time.time()) document_map = {} - # noinspection PyBroadException + try: logger.info("process_element - : start time: %s", str(start_process_time)) try: self.set_builder_name(queue_element) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: *** Error in IngestManager run getting builder name ***", self.thread_name, ) sys.exit("*** Error getting builder name: ") - if self.ingest_type_builder_name in self.builder_map.keys(): + if self.ingest_type_builder_name in self.builder_map: builder = self.builder_map[self.ingest_type_builder_name] else: builder_class = getattr(my_builder, self.ingest_type_builder_name) @@ -165,7 +165,7 @@ def process_queue_element(self, queue_element): self.write_document_to_files(queue_element, document_map) else: self.write_document_to_cb(queue_element, document_map) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: Exception in builder: %s", self.thread_name, diff --git a/src/vxingest/main.py b/src/vxingest/main.py index 2b710ba..81b5b59 100644 --- a/src/vxingest/main.py +++ b/src/vxingest/main.py @@ -296,7 +296,7 @@ def connect_cb(creds: dict[str, str]) -> Cluster: # Set the cluster to use the correct bucket and collection # TODO - is this needed? The couchbase docs seemed to indicate it was bucket = cluster.bucket(creds["cb_bucket"]) - collection = bucket.scope(creds["cb_scope"]).collection(creds["cb_collection"]) + bucket.scope(creds["cb_scope"]).collection(creds["cb_collection"]) return cluster diff --git a/src/vxingest/netcdf_to_cb/netcdf_builder.py b/src/vxingest/netcdf_to_cb/netcdf_builder.py index 45e7273..f7cbd21 100644 --- a/src/vxingest/netcdf_to_cb/netcdf_builder.py +++ b/src/vxingest/netcdf_to_cb/netcdf_builder.py @@ -12,10 +12,10 @@ import datetime as dt import logging import math -import os import re import time import traceback +from pathlib import Path from pstats import Stats import netCDF4 as nc @@ -33,7 +33,7 @@ logger = logging.getLogger(__name__) -class NetcdfBuilder(Builder): # pylint disable=too-many-instance-attributes +class NetcdfBuilder(Builder): """parent class for netcdf builders""" def __init__(self, load_spec, ingest_document): @@ -78,7 +78,7 @@ def derive_id(self, **kwargs): new_parts.append(value) new_id = ":".join(new_parts) return new_id - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception("NetcdfBuilder.derive_id: Exception error: %s") return None @@ -91,7 +91,7 @@ def translate_template_item(self, variable, rec_num): :return: """ replacements = [] - # noinspection PyBroadException + try: if isinstance(variable, str): replacements = variable.split("*")[1:] @@ -100,8 +100,8 @@ def translate_template_item(self, variable, rec_num): return variable make_str = False value = variable - Smatch = re.compile(".*S.*") # pylint:disable=invalid-name - Umatch = re.compile(".*U.*") # pylint:disable=invalid-name + Smatch = re.compile(".*S.*") + Umatch = re.compile(".*U.*") if len(replacements) > 0: for _ri in replacements: vtype = str(self.ncdf_data_set.variables[_ri].dtype) @@ -115,7 +115,6 @@ def translate_template_item(self, variable, rec_num): if chartostring: # for these we have to convert the character array AND convert to ISO (it is probably a string date) value = convert_to_iso( - # pylint: disable=maybe-no-member "*{ISO}" + nc.chartostring(self.ncdf_data_set[variable][rec_num]) ) @@ -130,7 +129,6 @@ def translate_template_item(self, variable, rec_num): if chartostring: # it is a char array of something value = value.replace( - # pylint: disable=maybe-no-member "*" + _ri, str( nc.chartostring( @@ -146,7 +144,7 @@ def translate_template_item(self, variable, rec_num): else: # it desn't need to be a string return self.ncdf_data_set[variable][rec_num] - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "Builder.translate_template_item for variable %s: replacements: %s", str(variable), @@ -162,7 +160,7 @@ def handle_document(self): each station will get values from the record. :return: The modified document_map """ - # noinspection PyBroadException + try: new_document = copy.deepcopy(self.template) rec_num_data_size = self.ncdf_data_set.dimensions["recNum"].size @@ -172,7 +170,7 @@ def handle_document(self): # once all the translations have occured new_document = initialize_data_array(new_document) for rec_num in range(rec_num_data_size): - for key in self.template.keys(): + for key in self.template: if key == "data": new_document = self.handle_data( doc=new_document, rec_num=rec_num @@ -191,7 +189,7 @@ def handle_document(self): "NetcdfBuilder.handle_document - cannot add document with key %s", str(new_document["id"]), ) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "NetcdfBuilder.handle_document: Exception instantiating builder: %s error: %s", self.__class__.__name__, @@ -209,7 +207,7 @@ def handle_key(self, doc, _rec_num, key): :param _key: A key to be processed, This can be a key to a primitive, or to another dictionary, or to a named function """ - # noinspection PyBroadException + try: if key == "id": an_id = self.derive_id( @@ -221,7 +219,7 @@ def handle_key(self, doc, _rec_num, key): if isinstance(doc[key], dict): # process an embedded dictionary tmp_doc = copy.deepcopy(self.template[key]) - for sub_key in tmp_doc.keys(): + for sub_key in tmp_doc: tmp_doc = self.handle_key(tmp_doc, _rec_num, sub_key) # recursion doc[key] = tmp_doc if ( @@ -233,7 +231,7 @@ def handle_key(self, doc, _rec_num, key): else: doc[key] = self.translate_template_item(doc[key], _rec_num) return doc - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s NetcdfBuilder.handle_key: Exception in builder", self.__class__.__name__, @@ -255,7 +253,7 @@ def handle_named_function(self, named_function_def, rec_num): will be substituted into the document. :_recNum the recNum being processed. """ - # noinspection PyBroadException + func = None try: func = named_function_def.split("|")[0].replace("&", "") @@ -266,7 +264,7 @@ def handle_named_function(self, named_function_def, rec_num): dict_params[_p[1:]] = self.translate_template_item(_p, rec_num) # call the named function using getattr replace_with = getattr(self, func)(dict_params) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s handle_named_function: %s params %s: Exception instantiating builder:", self.__class__.__name__, @@ -290,7 +288,7 @@ def handle_data(self, **kwargs): data_elem = {} data_key = next(iter(self.template["data"])) data_template = self.template["data"][data_key] - for key in data_template.keys(): + for key in data_template: try: value = data_template[key] # values can be null... @@ -298,7 +296,7 @@ def handle_data(self, **kwargs): value = self.handle_named_function(value, rec_num) else: value = self.translate_template_item(value, rec_num) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: value = None logger.warning( "%s Builder.handle_data - value is None", @@ -316,7 +314,7 @@ def handle_data(self, **kwargs): ) self.load_data(doc, data_key, data_elem) return doc - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s handle_data: Exception instantiating builder", self.__class__.__name__, @@ -334,15 +332,15 @@ def build_document(self, queue_element): Returns: [dict]: document """ - # noinspection PyBroadException + try: bucket = self.load_spec["cb_connection"]["bucket"] scope = self.load_spec["cb_connection"]["scope"] collection = self.load_spec["cb_connection"]["collection"] # stash the file_name so that it can be used later - self.file_name = os.path.basename(queue_element) - # pylint: disable=no-member + self.file_name = Path(queue_element).name + self.ncdf_data_set = nc.Dataset(queue_element) if len(self.stations) == 0: stmnt = f"""SELECT {self.subset}.* @@ -363,7 +361,9 @@ def build_document(self, queue_element): if self.do_profiling: with cProfile.Profile() as _pr: self.handle_document() - with open("profiling_stats.txt", "w", encoding="utf-8") as stream: + with Path("profiling_stats.txt").open( + "w", encoding="utf-8" + ) as stream: stats = Stats(_pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") @@ -371,7 +371,7 @@ def build_document(self, queue_element): stats.print_stats() else: self.handle_document() - # pylint: disable=assignment-from-no-return + document_map = self.get_document_map() data_file_id = self.create_data_file_id( self.subset, "netcdf", "madis", queue_element @@ -381,7 +381,7 @@ def build_document(self, queue_element): ) document_map[data_file_doc["id"]] = data_file_doc return document_map - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: Exception with builder build_document: file_name: %s", self.__class__.__name__, @@ -391,7 +391,7 @@ def build_document(self, queue_element): # Concrete builders -class NetcdfMetarObsBuilderV01(NetcdfBuilder): # pylint: disable=too-many-instance-attributes +class NetcdfMetarObsBuilderV01(NetcdfBuilder): """ This is the builder for observation data that is ingested from netcdf (madis) files """ @@ -431,7 +431,7 @@ def build_datafile_doc(self, file_name, data_file_id, origin_type): The VxIngest will examine the existing dataFile documents to determine if a psecific file has already been ingested. """ - mtime = os.path.getmtime(file_name) + mtime = Path(file_name).stat().st_mtime df_doc = { "id": data_file_id, "mtime": mtime, @@ -462,7 +462,7 @@ def get_document_map(self): if len(self.same_time_rows) != 0: self.handle_document() return self.document_map - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s get_document_map: Exception in get_document_map: %s", self.__class__.__name__, @@ -482,9 +482,9 @@ def load_data(self, doc, key, element): :param element: the observation data :return: the document being created """ - if "data" not in doc.keys() or doc["data"] is None: + if "data" not in doc or doc["data"] is None: doc["data"] = {} - if element["name"] not in doc["data"].keys(): + if element["name"] not in doc["data"]: # we only want the closest record (to match the legacy-sql data) doc["data"][element["name"]] = element else: @@ -510,7 +510,7 @@ def meterspersecond_to_milesperhour(self, params_dict): if value is not None and value != "": value = value * 2.237 return value - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_data: Exception in named function meterspersecond_to_milesperhour: error: %s", self.__class__.__name__, @@ -518,7 +518,7 @@ def meterspersecond_to_milesperhour(self, params_dict): ) return None - def ceiling_transform(self, params_dict): # pylint: disable=too-many-locals + def ceiling_transform(self, params_dict): """retrieves skyCover and skyLayerBase data and transforms it into a Ceiling value Args: params_dict (dict): named function parameters @@ -526,23 +526,19 @@ def ceiling_transform(self, params_dict): # pylint: disable=too-many-locals [type]: [description] """ try: - skyCover = params_dict["skyCover"] # pylint:disable=invalid-name - skyLayerBase = params_dict["skyLayerBase"] # pylint:disable=invalid-name + skyCover = params_dict["skyCover"] + skyLayerBase = params_dict["skyLayerBase"] # code clear as 60,000 ft - mCLR = re.compile(".*CLR.*") # pylint:disable=invalid-name - mSKC = re.compile(".*SKC.*") # pylint:disable=invalid-name - mNSC = re.compile(".*NSC.*") # pylint:disable=invalid-name - mFEW = re.compile(".*FEW.*") # pylint:disable=invalid-name - mSCT = re.compile(".*SCT.*") # pylint:disable=invalid-name - mBKN = re.compile(".*BKN.*") # Broken pylint:disable=invalid-name - mOVC = re.compile(".*OVC.*") # Overcast pylint:disable=invalid-name - mVV = re.compile( # pylint: disable=invalid-name - ".*VV.*" - ) # Vertical Visibility pylint:disable=invalid-name + mCLR = re.compile(".*CLR.*") + mSKC = re.compile(".*SKC.*") + mNSC = re.compile(".*NSC.*") + mFEW = re.compile(".*FEW.*") + mSCT = re.compile(".*SCT.*") + mBKN = re.compile(".*BKN.*") # Broken + mOVC = re.compile(".*OVC.*") # Overcast + mVV = re.compile(".*VV.*") # Vertical Visibility mask_array = ma.getmaskarray(skyLayerBase) - skyCover_array = ( # pylint:disable=invalid-name - skyCover[1:-1].replace("'", "").split(" ") - ) + skyCover_array = skyCover[1:-1].replace("'", "").split(" ") # check for unmasked ceiling values - broken, overcast, vertical visibility - return associated skyLayerBase # name = str(nc.chartostring(self.ncdf_data_set['stationName'][params_dict['recNum']])) for index, sca_val in enumerate(skyCover_array): @@ -550,9 +546,7 @@ def ceiling_transform(self, params_dict): # pylint: disable=too-many-locals if (not mask_array[index]) and ( mBKN.match(sca_val) or mOVC.match(sca_val) or mVV.match(sca_val) ): - return math.floor( # pylint: disable=c-extension-no-member - skyLayerBase[index] * 3.281 - ) # pylint:disable=c-extension-no-member + return math.floor(skyLayerBase[index] * 3.281) # check for unmasked ceiling values - all the others - CLR, SKC, NSC, FEW, SCT - return 60000 for index, sca_val in enumerate(skyCover_array): # 60000 is aldready feet @@ -565,7 +559,7 @@ def ceiling_transform(self, params_dict): # pylint: disable=too-many-locals ): return 60000 # nothing was unmasked - return 60000 if there is a ceiling value in skycover array - for index, sca_val in enumerate(skyCover_array): + for sca_val in skyCover_array: if ( mCLR.match(sca_val) or mSKC.match(sca_val) @@ -576,7 +570,7 @@ def ceiling_transform(self, params_dict): # pylint: disable=too-many-locals return 60000 # masked and no ceiling value in skyCover_array return None - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_data: Exception in named function ceiling_transform: error: %s", self.__class__.__name__, @@ -602,7 +596,7 @@ def kelvin_to_farenheight(self, params_dict): if value is not None and value != "": value = (float(value) - 273.15) * 1.8 + 32 return value - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_data: Exception in named function kelvin_to_farenheight: error: %s", self.__class__.__name__, @@ -621,7 +615,7 @@ def umask_value_transform(self, params_dict): try: key = None rec_num = params_dict["recNum"] - for key in params_dict.keys(): + for key in params_dict: if key != "recNum": break nc_value = self.ncdf_data_set[key][rec_num] @@ -630,7 +624,7 @@ def umask_value_transform(self, params_dict): return float(value) else: return None - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s umask_value_transform: Exception in named function umask_value_transform for key %s: error: %s", self.__class__.__name__, @@ -744,7 +738,7 @@ def handle_pressure(self, params_dict): # convert to millibars (from pascals) and round value = float(value) / 100 return value - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_pressure: Exception in named function: error: %s", self.__class__.__name__, @@ -765,7 +759,7 @@ def handle_visibility(self, params_dict): if value is not None: value = float(value) / 1609.344 return value - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_visibility: Exception in named function: error: %s", self.__class__.__name__, @@ -782,14 +776,14 @@ def derive_valid_time_iso(self, params_dict): # convert the file name to an epoch using the mask try: key = None - for key in params_dict.keys(): + for key in params_dict: if key != "recNum": break _file_utc_time = dt.datetime.strptime(self.file_name, params_dict[key]) epoch = (_file_utc_time - dt.datetime(1970, 1, 1)).total_seconds() iso = convert_to_iso(epoch) return iso - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s : Exception in named function derive_valid_time_iso: error: %s", self.__class__.__name__, @@ -806,13 +800,13 @@ def derive_valid_time_epoch(self, params_dict): # convert the file name to an epoch using the mask try: key = None - for key in params_dict.keys(): + for key in params_dict: if key != "recNum": break _file_utc_time = dt.datetime.strptime(self.file_name, params_dict[key]) epoch = (_file_utc_time - dt.datetime(1970, 1, 1)).total_seconds() return int(epoch) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s : Exception in named function derive_valid_time_epoch: error: %s", self.__class__.__name__, @@ -839,7 +833,7 @@ def interpolate_time(self, params_dict): ) + dt.timedelta(hours=_ret_time.minute // delta_minutes) return calendar.timegm(_ret_time.timetuple()) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_data: Exception in named function interpolate_time: error: %s", self.__class__.__name__, @@ -859,7 +853,7 @@ def interpolate_time_iso(self, params_dict): if _time is None: return None return str(_time.isoformat()) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_data: Exception in named function interpolate_time_iso: error: %s", self.__class__.__name__, @@ -890,7 +884,7 @@ def fill_from_netcdf(self, rec_num, netcdf): )[0] else: netcdf["elevation"] = None - # pylint: disable=no-member + netcdf["description"] = str( nc.chartostring(self.ncdf_data_set["locationName"][rec_num]) ) @@ -918,7 +912,7 @@ def handle_station(self, params_dict): fcst_valid_epoch = self.derive_valid_time_epoch( {"file_name_pattern": self.load_spec["fmask"]} ) - # noinspection PyBroadException + try: # get the netcdf fields for comparing or adding new netcdf = self.fill_from_netcdf(rec_num, netcdf) @@ -927,10 +921,12 @@ def handle_station(self, params_dict): lon = truncate_round(float(netcdf["longitude"]), 5) station = None station_index = None - for station_index, a_station in enumerate(self.stations): + for idx, a_station in enumerate(self.stations): if a_station["name"] == station_name: station = a_station + station_index = idx break + if station is None: # get the netcdf fields for comparing or adding new an_id = "MD:V01:METAR:station:" + netcdf["name"] @@ -998,7 +994,7 @@ def handle_station(self, params_dict): an_id = self.stations[station_index]["id"] self.document_map[an_id] = self.stations[station_index] return params_dict["stationName"] - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s netcdfObsBuilderV01.handle_station: Exception finding or creating station to match station_name: params: %s", self.__class__.__name__, diff --git a/src/vxingest/netcdf_to_cb/run_ingest_threads.py b/src/vxingest/netcdf_to_cb/run_ingest_threads.py index 46d9200..79fd08c 100644 --- a/src/vxingest/netcdf_to_cb/run_ingest_threads.py +++ b/src/vxingest/netcdf_to_cb/run_ingest_threads.py @@ -155,7 +155,7 @@ def __init__(self): self.ingest_document = None super().__init__() - def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): # pylint:disable=too-many-locals + def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): """ This is the entry point for run_ingest_threads.py """ @@ -163,7 +163,7 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) self.thread_count = args["threads"] self.output_dir = args["output_dir"].strip() self.job_document_id = args["job_id"].strip() - if "file_pattern" in args.keys(): + if "file_pattern" in args: self.file_pattern = args["file_pattern"].strip() try: # put the real credentials into the load_spec @@ -233,7 +233,6 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) # Make the Pool of ingest_managers ingest_manager_list = [] for thread_count in range(int(self.thread_count)): - # noinspection PyBroadException try: self.load_spec["fmask"] = self.fmask ingest_manager_thread = VxIngestManager( @@ -246,7 +245,7 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) ) ingest_manager_list.append(ingest_manager_thread) ingest_manager_thread.start() - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error("*** Error in VXIngest %s***", str(_e)) # be sure to join all the threads to wait on them finished = [proc.join() for proc in ingest_manager_list] diff --git a/src/vxingest/netcdf_to_cb/vx_ingest_manager.py b/src/vxingest/netcdf_to_cb/vx_ingest_manager.py index ff5a4d6..85c471f 100644 --- a/src/vxingest/netcdf_to_cb/vx_ingest_manager.py +++ b/src/vxingest/netcdf_to_cb/vx_ingest_manager.py @@ -44,7 +44,7 @@ logger = logging.getLogger(__name__) -class VxIngestManager(CommonVxIngestManager): # pylint:disable=too-many-instance-attributes +class VxIngestManager(CommonVxIngestManager): """ IngestManager is a Process Thread that manages an object pool of builders to ingest data from GSD grib2 files or netcdf files into documents that can be @@ -118,7 +118,7 @@ def set_builder_name(self, queue_element): if self.ingest_type_builder_name is None: try: self.ingest_type_builder_name = self.ingest_document["builder_type"] - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: process_element: Exception getting ingest document for %s ", self.thread_name, @@ -134,22 +134,22 @@ def process_queue_element(self, queue_element): _e: exception """ # get or instantiate the builder - # noinspection PyBroadException + start_process_time = int(time.time()) document_map = {} - # noinspection PyBroadException + try: logger.info("process_element - : start time: %s", str(start_process_time)) try: self.set_builder_name(queue_element) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: *** Error in IngestManager run getting builder name ***", self.thread_name, ) sys.exit("*** Error getting builder name: ") - if self.ingest_type_builder_name in self.builder_map.keys(): + if self.ingest_type_builder_name in self.builder_map: builder = self.builder_map[self.ingest_type_builder_name] else: builder_class = getattr(my_builder, self.ingest_type_builder_name) @@ -160,7 +160,7 @@ def process_queue_element(self, queue_element): self.write_document_to_files(queue_element, document_map) else: self.write_document_to_cb(queue_element, document_map) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: Exception in builder: %s", self.thread_name, diff --git a/src/vxingest/partial_sums_to_cb/partial_sums_builder.py b/src/vxingest/partial_sums_to_cb/partial_sums_builder.py index ac3f4fe..82b28f1 100644 --- a/src/vxingest/partial_sums_to_cb/partial_sums_builder.py +++ b/src/vxingest/partial_sums_to_cb/partial_sums_builder.py @@ -12,6 +12,7 @@ import logging import re import time +from pathlib import Path from pstats import Stats from couchbase.exceptions import DocumentNotFoundException, TimeoutException @@ -29,7 +30,7 @@ logger = logging.getLogger(__name__) -class PartialSumsBuilder(Builder): # pylint:disable=too-many-instance-attributes +class PartialSumsBuilder(Builder): """ Parent class for PARTIALSUMS builders 1) find all the stations for the region for this ingest (model and region) @@ -146,7 +147,7 @@ def derive_id(self, **kwargs): new_parts.append(value) new_id = ":".join(new_parts) return new_id - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception("PARTIALSUMSBuilder.derive_id") return None @@ -176,7 +177,7 @@ def translate_template_item(self, variable): else: value = variable.replace("*" + _ri, str(value)) return value - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "PartialSumsBuilder.translate_template_item: Exception error: %s", str(_e), @@ -189,7 +190,7 @@ def handle_document(self): the self.modelData and self.obsData :return: The modified document_map """ - # noinspection PyBroadException + try: new_document = copy.deepcopy(self.template) if self.domain_stations is None: @@ -200,9 +201,8 @@ def handle_document(self): # make a copy of the template, which will become the new document # once all the translations have occured new_document = initialize_data_array(new_document) - for key in self.template.keys(): + for key in self.template: if key == "data": - # pylint: disable=assignment-from-no-return new_document = self.handle_data(doc=new_document) continue new_document = self.handle_key(new_document, key) @@ -218,7 +218,7 @@ def handle_document(self): "PartialSumsBuilder.handle_document - cannot add document with key %s", str(new_document["id"]), ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s PartialSumsBuilder.handle_document: Exception instantiating builder: error %s", self.__class__.__name__, @@ -236,7 +236,7 @@ def handle_key(self, doc, key): :param _key: A key to be processed, This can be a key to a primitive, or to another dictionary, or to a named function """ - # noinspection PyBroadException + try: if key == "id": an_id = self.derive_id(template_id=self.template["id"]) @@ -246,7 +246,7 @@ def handle_key(self, doc, key): if isinstance(doc[key], dict): # process an embedded dictionary tmp_doc = copy.deepcopy(self.template[key]) - for sub_key in tmp_doc.keys(): + for sub_key in tmp_doc: tmp_doc = self.handle_key(tmp_doc, sub_key) # recursion doc[key] = tmp_doc if ( @@ -258,7 +258,7 @@ def handle_key(self, doc, key): else: doc[key] = self.translate_template_item(doc[key]) return doc - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s PartialSumsBuilder.handle_key: Exception in builder", self.__class__.__name__, @@ -301,7 +301,7 @@ def handle_named_function(self, named_function_def): dict_params[_p] = self.translate_template_item(_p) # call the named function using getattr replace_with = getattr(self, func)(dict_params) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s handle_named_function: %s params %s: Exception instantiating builder:", self.__class__.__name__, @@ -310,12 +310,12 @@ def handle_named_function(self, named_function_def): ) return replace_with - def handle_fcstValidEpochs(self): # pylint: disable=invalid-name + def handle_fcstValidEpochs(self): """iterate through all the fcstValidEpochs for which we have both model data and observation data. For each entry in the data section, i.e for each station build a data element that has model and observation data, then handle the document. """ - try: # pylint: disable=too-many-nested-blocks + try: _obs_data = {} for fve in self.model_fcst_valid_epochs: try: @@ -328,7 +328,7 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name self.region, fve["fcstValidEpoch"] ) self.domain_stations = full_station_name_list - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder build_document: error: %s", self.__class__.__name__, @@ -357,7 +357,7 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name self.__class__.__name__, fve["id"], ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s Error getting model document: %s", self.__class__.__name__, @@ -386,7 +386,7 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name obs_id, ) continue - for key in _obs_data["data"].keys(): + for key in _obs_data["data"]: self.obs_data[key] = _obs_data["data"][key] self.obs_station_names.append(key) self.obs_station_names.sort() @@ -397,14 +397,14 @@ def handle_fcstValidEpochs(self): # pylint: disable=invalid-name self.__class__.__name__, fve["id"], ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.exception( "%s problem getting obs document: %s", self.__class__.__name__, str(_e), ) - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s handle_fcstValidEpochs: Exception instantiating builder: error: %s", self.__class__.__name__, @@ -434,7 +434,7 @@ def build_document(self, queue_element): fcstValidEpoch and fcstLen. This will result in a document for each fcstLen within a fcstValidEpoch. 5) and 6) are enclosed in the handle_document() """ - # noinspection PyBroadException + try: # reset the builders document_map for a new file self.initialize_document_map() @@ -581,12 +581,14 @@ def build_document(self, queue_element): self.model_fcst_valid_epochs.append(fve) # if we have asked for profiling go ahead and do it - # pylint: disable=no-member + if self.do_profiling: with cProfile.Profile() as _pr: # process the fcstValidEpochs with profiling self.handle_fcstValidEpochs() - with open("profiling_stats.txt", "w", encoding="utf-8") as stream: + with Path("profiling_stats.txt").open( + "w", encoding="utf-8" + ) as stream: stats = Stats(_pr, stream=stream) stats.strip_dirs() stats.sort_stats("time") @@ -595,13 +597,13 @@ def build_document(self, queue_element): else: # process the fcstValidEpochs without profiling self.handle_fcstValidEpochs() - # pylint: disable=assignment-from-no-return + logger.info( "There were %s stations not found", self.not_found_station_count ) document_map = self.get_document_map() return document_map - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder build_document: error: %s for element %s", self.__class__.__name__, @@ -610,7 +612,7 @@ def build_document(self, queue_element): ) return {} - def get_stations_for_region_by_geosearch(self, region_name, valid_epoch): # pylint: disable=unused-argument + def get_stations_for_region_by_geosearch(self, region_name, valid_epoch): # NOTE: this is currently broken because we have to modify this query to # work woth the data model that has data elements as a MAP indexed by station name """Using a geosearh return all the stations within the defined region @@ -647,7 +649,7 @@ def get_stations_for_region_by_geosearch(self, region_name, valid_epoch): # pyl _domain_stations.append(elem.fields["name"]) _domain_stations.sort() return _domain_stations - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder: error: %s", self.__class__.__name__, @@ -669,7 +671,7 @@ def get_legacy_stations_for_region(self, region_name): classic_stations = doc.content_as[dict]["stations"] classic_stations.sort() return classic_stations - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder: error: %s", self.__class__.__name__, @@ -740,7 +742,7 @@ def get_stations_for_region_by_sort(self, region_name, valid_epoch): continue _domain_stations.sort() return _domain_stations - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s: Exception with builder: error: %s", self.__class__.__name__, @@ -867,17 +869,15 @@ def handle_sum(self, params_dict): ) model_elem["UW"] = wind_components_t[0].magnitude model_elem["VW"] = wind_components_t[1].magnitude - if variable in obs_elem and variable in model_elem: - if ( - obs_elem[variable] is not None - and model_elem[variable] is not None - ): - obs_vals.append(obs_elem[variable]) - model_vals.append(model_elem[variable]) - _diff = model_elem[variable] - obs_elem[variable] - diff_vals.append(_diff) - diff_vals_squared.append(_diff * _diff) - abs_diff_vals.append(abs(_diff)) + obs_var = obs_elem[variable] + model_var = model_elem[variable] + if obs_var is not None and model_var is not None: + obs_vals.append(obs_var) + model_vals.append(model_var) + _diff = model_var - obs_var + diff_vals.append(_diff) + diff_vals_squared.append(_diff * _diff) + abs_diff_vals.append(abs(_diff)) sum_elem = { "num_recs": len(obs_vals), "sum_obs": sum(obs_vals), @@ -887,7 +887,7 @@ def handle_sum(self, params_dict): "sum_abs": sum(abs_diff_vals), } return sum_elem - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error( "%s handle_sum: Exception : error: %s", self.__class__.__name__, @@ -895,7 +895,7 @@ def handle_sum(self, params_dict): ) return None - def handle_data(self, **kwargs): # pylint:disable=too-many-branches + def handle_data(self, **kwargs): """ This routine processes the partialsums data element. The data elements are variables for which we will derive a set of sums. The sums are @@ -919,18 +919,18 @@ def handle_data(self, **kwargs): # pylint:disable=too-many-branches SurfacePressure: {...} } """ - try: # pylint: disable=too-many-nested-blocks + try: doc = kwargs["doc"] template_data = self.template["data"] data_elem = {} # it is expected that the template data section be comprised of named functions - for variable in template_data.keys(): + for variable in template_data: data_elem[variable] = self.handle_named_function( template_data[variable] ) doc["data"] = data_elem return doc - except Exception as _e: # pylint: disable=broad-except + except Exception as _e: logger.error( "%s handle_data: Exception : error: %s", self.__class__.__name__, @@ -938,7 +938,7 @@ def handle_data(self, **kwargs): # pylint:disable=too-many-branches ) return doc - def handle_time(self, params_dict): # pylint: disable=unused-argument + def handle_time(self, params_dict): """return the fcstValidTime for the current model in epoch Args: params_dict (dict): contains named_function parameters @@ -947,7 +947,7 @@ def handle_time(self, params_dict): # pylint: disable=unused-argument """ return self.model_data["fcstValidEpoch"] - def handle_iso_time(self, params_dict): # pylint: disable=unused-argument + def handle_iso_time(self, params_dict): """return the fcstValidTime for the current model in ISO Args: params_dict (dict): contains named_function parameters @@ -958,7 +958,7 @@ def handle_iso_time(self, params_dict): # pylint: disable=unused-argument self.model_data["fcstValidEpoch"] ).isoformat() - def handle_fcst_len(self, params_dict): # pylint: disable=unused-argument + def handle_fcst_len(self, params_dict): """returns the fcst lead time in hours for this document Args: params_dict (dict): contains named_function parameters @@ -967,7 +967,7 @@ def handle_fcst_len(self, params_dict): # pylint: disable=unused-argument """ return self.model_data["fcstLen"] - def handleWindDirU(self, params_dict): # pylint: disable=unused-argument, invalid-name + def handleWindDirU(self, params_dict): """returns the wind direction U component for this document Args: params_dict (dict): contains named_function parameters @@ -976,7 +976,7 @@ def handleWindDirU(self, params_dict): # pylint: disable=unused-argument, inval """ return self.model_data["windDirU"] - def handleWindDirV(self, params_dict): # pylint: disable=unused-argument, invalid-name + def handleWindDirV(self, params_dict): """returns the wind direction V component for this document Args: params_dict (dict): contains named_function parameters @@ -985,7 +985,7 @@ def handleWindDirV(self, params_dict): # pylint: disable=unused-argument, inval """ return self.model_data["windDirV"] - def handle_specific_humidity(self, params_dict): # pylint: disable=unused-argument + def handle_specific_humidity(self, params_dict): """returns the specific humidity for this document Args: params_dict (dict): contains named_function parameters diff --git a/src/vxingest/partial_sums_to_cb/run_ingest_threads.py b/src/vxingest/partial_sums_to_cb/run_ingest_threads.py index 9f19a71..4100a5a 100644 --- a/src/vxingest/partial_sums_to_cb/run_ingest_threads.py +++ b/src/vxingest/partial_sums_to_cb/run_ingest_threads.py @@ -163,7 +163,7 @@ def __init__(self): self.ingest_document = None super().__init__() - def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): # pylint:disable=too-many-locals + def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]): """ This is the entry point for run_ingest_threads.py """ @@ -234,7 +234,6 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) ) logger.info(f"Starting {self.thread_count} processes") for thread_count in range(int(self.thread_count)): - # noinspection PyBroadException try: ingest_manager_thread = VxIngestManager( f"VxIngestManager-{thread_count+1}", # Processes are 1 indexed in the logger @@ -247,7 +246,7 @@ def runit(self, args, log_queue: Queue, log_configurer: Callable[[Queue], None]) ingest_manager_list.append(ingest_manager_thread) ingest_manager_thread.start() logger.info(f"Started thread: VxIngestManager-{thread_count+1}") - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.error("*** Error in VXIngest %s***", str(_e)) # be sure to join all the threads to wait on them finished = [proc.join() for proc in ingest_manager_list] @@ -283,7 +282,7 @@ def main(self): # Tell the logging thread to finish up, too log_queue_listener.stop() sys.exit(0) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.info("*** FINISHED with exception %s***", str(_e)) # Tell the logging thread to finish up, too log_queue_listener.stop() diff --git a/src/vxingest/partial_sums_to_cb/vx_ingest_manager.py b/src/vxingest/partial_sums_to_cb/vx_ingest_manager.py index 517c28b..9fb63f3 100644 --- a/src/vxingest/partial_sums_to_cb/vx_ingest_manager.py +++ b/src/vxingest/partial_sums_to_cb/vx_ingest_manager.py @@ -42,7 +42,7 @@ logger = logging.getLogger(__name__) -class VxIngestManager(CommonVxIngestManager): # pylint:disable=too-many-instance-attributes +class VxIngestManager(CommonVxIngestManager): """ IngestManager is a Process Thread that manages an object pool of builders to ingest data from GSD couchbase documents, producing new documents @@ -117,7 +117,7 @@ def set_builder_name(self, queue_element): self.ingest_type_builder_name = self.load_spec["ingest_documents"][ queue_element ]["builder_type"] - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: process_element: Exception getting ingest document for %s", self.thread_name, @@ -133,22 +133,22 @@ def process_queue_element(self, queue_element): _e: exception """ # get or instantiate the builder - # noinspection PyBroadException + start_process_time = int(time.time()) document_map = {} - # noinspection PyBroadException + try: logger.info("process_element - : start time: %s", str(start_process_time)) try: self.set_builder_name(queue_element) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: *** Error in IngestManager run getting builder name ***", self.thread_name, ) sys.exit("*** Error getting builder name: ") - if self.ingest_type_builder_name in self.builder_map.keys(): + if self.ingest_type_builder_name in self.builder_map: builder = self.builder_map[self.ingest_type_builder_name] else: builder_class = getattr(my_builder, self.ingest_type_builder_name) @@ -165,7 +165,7 @@ def process_queue_element(self, queue_element): else: logger.info("writing document map for %s to database", queue_element) self.write_document_to_cb(queue_element, document_map) - except Exception as _e: # pylint:disable=broad-except + except Exception as _e: logger.exception( "%s: Exception in builder: %s", self.thread_name, diff --git a/src/vxingest/utilities/backfill_obs_with_rh.py b/src/vxingest/utilities/backfill_obs_with_rh.py index 93b0b1e..8495a92 100755 --- a/src/vxingest/utilities/backfill_obs_with_rh.py +++ b/src/vxingest/utilities/backfill_obs_with_rh.py @@ -64,7 +64,7 @@ def calc_components(doc): ) else: station["RH"] = None - if "WindU" not in station or "WindV" not in station.keys(): + if "WindU" not in station or "WindV" not in station: if station["WS"] is not None and station["WD"] is not None: _u, _v = wind_components( station["WS"] * units("m/s"), station["WD"] * units.deg diff --git a/tests/vxingest/builder_common/test_bc_builder_utilities.py b/tests/vxingest/builder_common/test_bc_builder_utilities.py index 5ada31b..f312412 100644 --- a/tests/vxingest/builder_common/test_bc_builder_utilities.py +++ b/tests/vxingest/builder_common/test_bc_builder_utilities.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring import pytest from vxingest.builder_common.builder_utilities import ( convert_to_iso, @@ -50,11 +49,14 @@ def test_convert_to_iso(): assert convert_to_iso(1627976400) == "2021-08-03T07:40:00Z" assert convert_to_iso("1627976400") == "2021-08-03T07:40:00Z" assert convert_to_iso(1627976400.123) == "2021-08-03T07:40:00Z" - with pytest.raises(ValueError): + with pytest.raises( + ValueError, match=r"invalid literal for int\(\) with base 10: .*" + ): convert_to_iso("1627976400.123") - with pytest.raises(ValueError): - v = convert_to_iso("not_an_epoch") - assert v == "1970-00-00T00:00:00Z" + with pytest.raises( + ValueError, match=r"invalid literal for int\(\) with base 10: .*" + ): + assert convert_to_iso("not_an_epoch") == "1970-00-00T00:00:00Z" def test_truncate_round(): diff --git a/tests/vxingest/builder_common/test_unit_queries.py b/tests/vxingest/builder_common/test_unit_queries.py index 1beac55..705fcec 100644 --- a/tests/vxingest/builder_common/test_unit_queries.py +++ b/tests/vxingest/builder_common/test_unit_queries.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring import os from datetime import timedelta from pathlib import Path @@ -13,131 +12,101 @@ def connect_cb(): """ create a couchbase connection and maintain the collection and cluster objects. """ - # noinspection PyBroadException - try: - try: - cb_connection # pylint: disable=used-before-assignment - except NameError: - credentials_file = os.environ["CREDENTIALS"] - assert ( - Path(credentials_file).is_file() is True - ), f"*** credentials_file file {credentials_file} can not be found!" - _f = open(credentials_file, encoding="utf-8") - _yaml_data = yaml.load(_f, yaml.SafeLoader) - cb_connection = {} - cb_connection["host"] = _yaml_data["cb_host"] - cb_connection["user"] = _yaml_data["cb_user"] - cb_connection["password"] = _yaml_data["cb_password"] - cb_connection["bucket"] = _yaml_data["cb_bucket"] - cb_connection["collection"] = _yaml_data["cb_collection"] - cb_connection["scope"] = _yaml_data["cb_scope"] - _f.close() + credentials_file = os.environ["CREDENTIALS"] + assert ( + Path(credentials_file).is_file() is True + ), f"*** credentials_file file {credentials_file} can not be found!" + with Path(credentials_file).open(encoding="utf-8") as _f: + _yaml_data = yaml.load(_f, yaml.SafeLoader) + cb_connection = {} + cb_connection["host"] = _yaml_data["cb_host"] + cb_connection["user"] = _yaml_data["cb_user"] + cb_connection["password"] = _yaml_data["cb_password"] + cb_connection["bucket"] = _yaml_data["cb_bucket"] + cb_connection["collection"] = _yaml_data["cb_collection"] + cb_connection["scope"] = _yaml_data["cb_scope"] - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), - timeout_options=timeout_options, - ) - cb_connection["cluster"] = Cluster( - "couchbase://" + cb_connection["host"], options - ) - cb_connection["collection"] = ( - cb_connection["cluster"] - .bucket(cb_connection["bucket"]) - .collection(cb_connection["collection"]) - ) - return cb_connection - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_unit_queries Exception failure connecting: {_e}" + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), + timeout_options=timeout_options, + ) + cb_connection["cluster"] = Cluster("couchbase://" + cb_connection["host"], options) + cb_connection["collection"] = ( + cb_connection["cluster"] + .bucket(cb_connection["bucket"]) + .collection(cb_connection["collection"]) + ) + return cb_connection def test_stations_fcst_valid_epoch(request): - """test""" - try: - _expected_time = 10 - _name = request.node.name - testdata = Path( - "tests/vxingest/builder_common/testdata/stations_fcst_valid_epoch.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, f"{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _expected_time = 10 + _name = request.node.name + testdata = Path( + "tests/vxingest/builder_common/testdata/stations_fcst_valid_epoch.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, f"{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_stations_get_file_list_grib2(request): - """test""" - try: - _expected_time = 10 - _name = request.node.name - testdata = Path( - "tests/vxingest/builder_common/testdata/get_file_list_grib2.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, f"{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _expected_time = 10 + _name = request.node.name + testdata = Path("tests/vxingest/builder_common/testdata/get_file_list_grib2.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, f"{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_stations_get_file_list_netcdf(request): - """test""" - try: - _expected_time = 5 - _name = request.node.name - testdata = Path( - "tests/vxingest/builder_common/testdata/get_file_list_netcdf.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, f"{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _expected_time = 5 + _name = request.node.name + testdata = Path("tests/vxingest/builder_common/testdata/get_file_list_netcdf.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, f"{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_metar_count(request): - """test""" - try: - _expected_time = 0.05 - _name = request.node.name - testdata = Path("tests/vxingest/builder_common/testdata/METAR_count.n1ql") - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, f"{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _expected_time = 0.05 + _name = request.node.name + testdata = Path("tests/vxingest/builder_common/testdata/METAR_count.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, f"{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" diff --git a/tests/vxingest/ctc_to_cb/test_int_metar_ctc.py b/tests/vxingest/ctc_to_cb/test_int_metar_ctc.py index 3365242..3f9833b 100644 --- a/tests/vxingest/ctc_to_cb/test_int_metar_ctc.py +++ b/tests/vxingest/ctc_to_cb/test_int_metar_ctc.py @@ -1,8 +1,6 @@ -# pylint: disable=too-many-lines """ test for VxIngest CTC builders """ -import glob import json import os import time @@ -10,6 +8,7 @@ from multiprocessing import Queue from pathlib import Path +import pytest import yaml from couchbase.auth import PasswordAuthenticator from couchbase.cluster import Cluster @@ -41,49 +40,45 @@ def test_check_fcst_valid_epoch_fcst_valid_iso(): """ integration test to check fcst_valid_epoch is derived correctly """ - try: - credentials_file = os.environ["CREDENTIALS"] - assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf-8") + credentials_file = os.environ["CREDENTIALS"] + assert Path(credentials_file).is_file(), "credentials_file Does not exist" + with Path(credentials_file).open(encoding="utf-8") as _f: yaml_data = yaml.load(_f, yaml.SafeLoader) - _host = yaml_data["cb_host"] - _user = yaml_data["cb_user"] - _password = yaml_data["cb_password"] - _bucket = yaml_data["cb_bucket"] - _collection = yaml_data["cb_collection"] - _scope = yaml_data["cb_scope"] - _f.close() - - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(_user, _password), timeout_options=timeout_options - ) - cluster = Cluster("couchbase://" + _host, options) - options = ClusterOptions(PasswordAuthenticator(_user, _password)) - cluster = Cluster("couchbase://" + _host, options) - stmnt = f"""SELECT m0.fcstValidEpoch fve, fcstValidISO fvi - FROM `{_bucket}`.{_scope}.{_collection} m0 - WHERE - m0.type='DD' - AND m0.docType='CTC' - AND m0.subset='{_collection}' - AND m0.version='V01' - AND m0.model='HRRR_OPS' - AND m0.region='ALL_HRRR' - """ - result = cluster.query(stmnt) - for row in result: - fve = row["fve"] - utc_time = datetime.strptime(row["fvi"], "%Y-%m-%dT%H:%M:%S") - epoch_time = int((utc_time - datetime(1970, 1, 1)).total_seconds()) - assert ( - fve == epoch_time - ), "fcstValidEpoch and fcstValidIso are not the same time" - assert (fve % 3600) == 0, "fcstValidEpoch is not at top of hour" - except Exception as _e: # pylint: disable=broad-except, disable=broad-except - assert False, f"TestGsdIngestManager.test_check_fcstValidEpoch_fcstValidIso Exception failure: {_e}" + _host = yaml_data["cb_host"] + _user = yaml_data["cb_user"] + _password = yaml_data["cb_password"] + _bucket = yaml_data["cb_bucket"] + _collection = yaml_data["cb_collection"] + _scope = yaml_data["cb_scope"] + + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(_user, _password), timeout_options=timeout_options + ) + cluster = Cluster("couchbase://" + _host, options) + options = ClusterOptions(PasswordAuthenticator(_user, _password)) + cluster = Cluster("couchbase://" + _host, options) + stmnt = f"""SELECT m0.fcstValidEpoch fve, fcstValidISO fvi + FROM `{_bucket}`.{_scope}.{_collection} m0 + WHERE + m0.type='DD' + AND m0.docType='CTC' + AND m0.subset='{_collection}' + AND m0.version='V01' + AND m0.model='HRRR_OPS' + AND m0.region='ALL_HRRR' + """ + result = cluster.query(stmnt) + for row in result: + fve = row["fve"] + utc_time = datetime.strptime(row["fvi"], "%Y-%m-%dT%H:%M:%S") + epoch_time = int((utc_time - datetime(1970, 1, 1)).total_seconds()) + assert ( + fve == epoch_time + ), "fcstValidEpoch and fcstValidIso are not the same time" + assert (fve % 3600) == 0, "fcstValidEpoch is not at top of hour" def test_get_stations_geo_search(): @@ -91,93 +86,85 @@ def test_get_stations_geo_search(): Currently we know that there are differences between the geo search stations list and the legacy stations list. This test does show those differences. The assertion is commented out. """ - try: - credentials_file = os.environ["CREDENTIALS"] - assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf-8") + credentials_file = os.environ["CREDENTIALS"] + assert Path(credentials_file).is_file(), "credentials_file Does not exist" + with Path(credentials_file).open(encoding="utf-8") as _f: yaml_data = yaml.load(_f, yaml.SafeLoader) - _host = yaml_data["cb_host"] - _user = yaml_data["cb_user"] - _password = yaml_data["cb_password"] - _bucket = yaml_data["cb_bucket"] - _collection = yaml_data["cb_collection"] - _scope = yaml_data["cb_scope"] - _f.close() - - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(_user, _password), timeout_options=timeout_options + _host = yaml_data["cb_host"] + _user = yaml_data["cb_user"] + _password = yaml_data["cb_password"] + _bucket = yaml_data["cb_bucket"] + _collection = yaml_data["cb_collection"] + _scope = yaml_data["cb_scope"] + + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(_user, _password), timeout_options=timeout_options + ) + cluster = Cluster("couchbase://" + _host, options) + collection = cluster.bucket(_bucket).scope(_scope).collection(_collection) + load_spec = {} + load_spec["cluster"] = cluster + load_spec["collection"] = collection + load_spec["ingest_document_ids"] = [ + f"MD:V01:{_collection}:HRRR_OPS:ALL_HRRR:CTC:CEILING:ingest" + ] + # get the ingest document id. + ingest_document_result = collection.get( + f"MD-TEST:V01:{_collection}:HRRR_OPS:ALL_HRRR:CTC:CEILING:ingest" + ) + ingest_document = ingest_document_result.content_as[dict] + # instantiate a ctcBuilder so we can use its get_station methods + builder_class = ctc_builder.CTCModelObsBuilderV01 + builder = builder_class(load_spec, ingest_document) + # usually these would get assigned in build_document + builder.bucket = _bucket + builder.scope = _scope + builder.collection = _collection + builder.subset = _collection + + result = cluster.query( + f""" + SELECT name, + geo.bottom_right.lat AS br_lat, + geo.bottom_right.lon AS br_lon, + geo.top_left.lat AS tl_lat, + geo.top_left.lon AS tl_lon + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='MD' + AND docType='region' + AND subset='COMMON' + AND version='V01' + """ + ) + for row in result: + # use the builder geosearch to get the station list - just use current epoch + stations = sorted( + builder.get_stations_for_region_by_sort(row["name"], round(time.time())) ) - cluster = Cluster("couchbase://" + _host, options) - collection = cluster.bucket(_bucket).scope(_scope).collection(_collection) - load_spec = {} - load_spec["cluster"] = cluster - load_spec["collection"] = collection - load_spec["ingest_document_ids"] = [ - f"MD:V01:{_collection}:HRRR_OPS:ALL_HRRR:CTC:CEILING:ingest" + # get the legacy station list from the test document (this came from mysql) + classic_stations = builder.get_legacy_stations_for_region(row["name"]) + stations_difference = [ + i + for i in classic_stations + stations + if i not in classic_stations or i not in stations ] - # get the ingest document id. - ingest_document_result = collection.get( - f"MD-TEST:V01:{_collection}:HRRR_OPS:ALL_HRRR:CTC:CEILING:ingest" - ) - ingest_document = ingest_document_result.content_as[dict] - # instantiate a ctcBuilder so we can use its get_station methods - builder_class = ctc_builder.CTCModelObsBuilderV01 - builder = builder_class(load_spec, ingest_document) - # usually these would get assigned in build_document - builder.bucket = _bucket - builder.scope = _scope - builder.collection = _collection - builder.subset = _collection - - result = cluster.query( - f""" - SELECT name, - geo.bottom_right.lat AS br_lat, - geo.bottom_right.lon AS br_lon, - geo.top_left.lat AS tl_lat, - geo.top_left.lon AS tl_lon - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='MD' - AND docType='region' - AND subset='COMMON' - AND version='V01' - """ + print( + "region " + + row["name"] + + "difference length is " + + str(len(stations_difference)) + + " stations symmetric_difference is " + + str(stations_difference) ) - for row in result: - # use the builder geosearch to get the station list - just use current epoch - stations = sorted( # pylint: disable=redefined-outer-name - # builder.get_stations_for_region_by_geosearch(row["name"],round(time.time())) - builder.get_stations_for_region_by_sort(row["name"], round(time.time())) - ) - # get the legacy station list from the test document (this came from mysql) - # classic_station_id = "MD-TEST:V01:CLASSIC_STATIONS:" + row["name"] - # doc = collection.get(classic_station_id.strip()) - # classic_stations = sorted(doc.content_as[dict]["stations"]) - classic_stations = builder.get_legacy_stations_for_region(row["name"]) - stations_difference = [ - i - for i in classic_stations + stations - if i not in classic_stations or i not in stations - ] - print( - "region " - + row["name"] - + "difference length is " - + str(len(stations_difference)) - + " stations symmetric_difference is " - + str(stations_difference) - ) - assert ( - len(stations_difference) < 1000 - ), "difference between expected and actual greater than 100" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGsdIngestManager Exception failure: {_e}" + assert ( + len(stations_difference) < 1000 + ), "difference between expected and actual greater than 100" -def calculate_cb_ctc( # pylint: disable=dangerous-default-value,missing-function-docstring +def calculate_cb_ctc( epoch, fcst_len, threshold, @@ -185,22 +172,24 @@ def calculate_cb_ctc( # pylint: disable=dangerous-default-value,missing-functio subset, region, doc_sub_type, - reject_stations=[], + reject_stations=None, ): - global cb_model_obs_data # pylint: disable=global-statement - global stations # pylint: disable=global-statement + if reject_stations is None: + reject_stations = [] + + global cb_model_obs_data + global stations credentials_file = os.environ["CREDENTIALS"] assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf-8") - yaml_data = yaml.load(_f, yaml.SafeLoader) + with Path(credentials_file).open(encoding="utf-8") as _f: + yaml_data = yaml.load(_f, yaml.SafeLoader) _host = yaml_data["cb_host"] _user = yaml_data["cb_user"] _password = yaml_data["cb_password"] _bucket = yaml_data["cb_bucket"] _collection = yaml_data["cb_collection"] _scope = yaml_data["cb_scope"] - _f.close() timeout_options = ClusterTimeoutOptions( kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) @@ -225,34 +214,31 @@ def calculate_cb_ctc( # pylint: disable=dangerous-default-value,missing-functio builder.scope = _scope builder.collection = _collection builder.subset = _collection - legacy_stations = sorted( - # builder.get_stations_for_region_by_geosearch(region, epoch) - builder.get_stations_for_region_by_sort(region, epoch) - ) + legacy_stations = sorted(builder.get_stations_for_region_by_sort(region, epoch)) obs_id = f"DD:V01:{subset}:obs:{epoch}" - stations = sorted( # pylint: disable=redefined-outer-name + stations = sorted( [station for station in legacy_stations if station not in reject_stations] ) model_id = f"DD:V01:{subset}:{model}:{epoch}:{fcst_len}" print("cb_ctc model_id:", model_id, " obs_id:", obs_id) try: full_model_data = load_spec["collection"].get(model_id).content_as[dict] - except: # pylint: disable=bare-except + except Exception: time.sleep(0.25) full_model_data = load_spec["collection"].get(model_id).content_as[dict] - cb_model_obs_data = [] # pylint: disable=redefined-outer-name + cb_model_obs_data = [] try: full_obs_data = load_spec["collection"].get(obs_id).content_as[dict] - except: # pylint: disable=bare-except + except Exception: time.sleep(0.25) full_obs_data = load_spec["collection"].get(obs_id).content_as[dict] for station in stations: # find observation data for this station - if station not in full_obs_data["data"].keys(): + if station not in full_obs_data["data"]: continue obs_data = full_obs_data["data"][station] # find model data for this station - if station not in full_model_data["data"].keys(): + if station not in full_model_data["data"]: continue model_data = full_model_data["data"][station] # add to model_obs_data @@ -299,7 +285,7 @@ def calculate_cb_ctc( # pylint: disable=dangerous-default-value,missing-functio return ctc -def test_ctc_builder_ceiling_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals +def test_ctc_builder_ceiling_hrrr_ops_all_hrrr(): """ This test verifies that data is returned for each fcstLen and each threshold. It can be used to debug the builder by putting a specific epoch for first_epoch. @@ -309,98 +295,88 @@ def test_ctc_builder_ceiling_hrrr_ops_all_hrrr(): # pylint: disable=too-many-lo It calculates the CTC using couchbase data for input. Then the couchbase CTC fcstValidEpochs are compared and asserted against the derived CTC. """ - # noinspection PyBroadException - global cb_model_obs_data # pylint: disable=global-variable-not-assigned - global stations # pylint: disable=global-variable-not-assigned - try: - credentials_file = os.environ["CREDENTIALS"] - job_id = "JOB-TEST:V01:METAR:CTC:CEILING:MODEL:OPS" - outdir = "/opt/data/ctc_to_cb/hrrr_ops/ceiling/output" - if not os.path.exists(outdir): - # Create a new directory because it does not exist - os.makedirs(outdir) - filepaths = outdir + "/*.json" - files = glob.glob(filepaths) - for _f in files: - try: - os.remove(_f) - except OSError as _e: - assert False, f"Error: {_e}" - log_queue = Queue() - vx_ingest = VXIngest() - # These CTC's might already have been ingested in which case this won't do anything. - vx_ingest.runit( - { - "job_id": job_id, - "credentials_file": credentials_file, - "output_dir": outdir, - "threads": 1, - "first_epoch": 1638489600, - "last_epoch": 1638496800, - }, - log_queue, - stub_worker_log_configurer, - ) + global cb_model_obs_data + global stations - list_of_output_files = glob.glob(outdir + "/*") - # latest_output_file = max(list_of_output_files, key=os.path.getctime) - latest_output_file = min(list_of_output_files, key=os.path.getctime) - try: - # Opening JSON file - output_file = open(latest_output_file, encoding="utf8") - # returns JSON object as a dictionary - vx_ingest_output_data = json.load(output_file) - # if this is an LJ document then the CTC's were already ingested - # and the test should stop here - if vx_ingest_output_data[0]["type"] == "LJ": - return - # get the last fcstValidEpochs - fcst_valid_epochs = {doc["fcstValidEpoch"] for doc in vx_ingest_output_data} - # take a fcstValidEpoch in the middle of the list - fcst_valid_epoch = list(fcst_valid_epochs)[int(len(fcst_valid_epochs) / 2)] - _thresholds = ["500", "1000", "3000", "60000"] - # get all the documents that have the chosen fcstValidEpoch - docs = [ - _doc - for _doc in vx_ingest_output_data - if _doc["fcstValidEpoch"] == fcst_valid_epoch - ] - # get all the fcstLens for those docs - fcst_lens = [] - for _elem in docs: - fcst_lens.append(_elem["fcstLen"]) - output_file.close() - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestCTCBuilderV01 Exception failure opening output: {_e}" - for _i in fcst_lens: - _elem = None - # find the document for this fcst_len - for _elem in docs: - if _elem["fcstLen"] == _i: - break - # process all the thresholds - for _t in _thresholds: - print( - f"Asserting derived CTC for fcstValidEpoch: {_elem['fcstValidEpoch']} model: HRRR_OPS region: ALL_HRRR fcst_len: {_i} threshold: {_t}" - ) - cb_ctc = calculate_cb_ctc( - epoch=_elem["fcstValidEpoch"], - fcst_len=_i, - threshold=int(_t), - model="HRRR_OPS", - subset="METAR", - doc_sub_type="Ceiling", - region="ALL_HRRR", - ) - if cb_ctc is None: - print(f"cb_ctc is None for threshold {str(_t)}- contunuing") - continue - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestCTCBuilderV01 Exception failure: {_e}" - - -def test_ctc_builder_visibility_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals + credentials_file = os.environ["CREDENTIALS"] + job_id = "JOB-TEST:V01:METAR:CTC:CEILING:MODEL:OPS" + outdir = Path("/opt/data/ctc_to_cb/hrrr_ops/ceiling/output") + if not outdir.exists(): + # Create a new directory because it does not exist + outdir.mkdir(parents=True) + files = outdir.glob("*.json") + for _f in files: + Path(_f).unlink() + log_queue = Queue() + vx_ingest = VXIngest() + # These CTC's might already have been ingested in which case this won't do anything. + vx_ingest.runit( + { + "job_id": job_id, + "credentials_file": credentials_file, + "output_dir": str(outdir), + "threads": 1, + "first_epoch": 1638489600, + "last_epoch": 1638496800, + }, + log_queue, + stub_worker_log_configurer, + ) + + list_of_output_files = outdir.glob("*") + latest_output_file = min(list_of_output_files, key=os.path.getctime) + + # Opening JSON file + with latest_output_file.open(encoding="utf8") as output_file: + # returns JSON object as a dictionary + vx_ingest_output_data = json.load(output_file) + # if this is an LJ document then the CTC's were already ingested + # and the test should stop here + if vx_ingest_output_data[0]["type"] == "LJ": + return + # get the last fcstValidEpochs + fcst_valid_epochs = {doc["fcstValidEpoch"] for doc in vx_ingest_output_data} + # take a fcstValidEpoch in the middle of the list + fcst_valid_epoch = list(fcst_valid_epochs)[int(len(fcst_valid_epochs) / 2)] + _thresholds = ["500", "1000", "3000", "60000"] + # get all the documents that have the chosen fcstValidEpoch + docs = [ + _doc + for _doc in vx_ingest_output_data + if _doc["fcstValidEpoch"] == fcst_valid_epoch + ] + # get all the fcstLens for those docs + fcst_lens = [] + for _elem in docs: + fcst_lens.append(_elem["fcstLen"]) + + for _i in fcst_lens: + _elem = None + # find the document for this fcst_len + for _elem in docs: + if _elem["fcstLen"] == _i: + break + # process all the thresholds + for _t in _thresholds: + print( + f"Asserting derived CTC for fcstValidEpoch: {_elem['fcstValidEpoch']} model: HRRR_OPS region: ALL_HRRR fcst_len: {_i} threshold: {_t}" + ) + cb_ctc = calculate_cb_ctc( + epoch=_elem["fcstValidEpoch"], + fcst_len=_i, + threshold=int(_t), + model="HRRR_OPS", + subset="METAR", + doc_sub_type="Ceiling", + region="ALL_HRRR", + ) + if cb_ctc is None: + print(f"cb_ctc is None for threshold {str(_t)}- contunuing") + continue + + +def test_ctc_builder_visibility_hrrr_ops_all_hrrr(): """ This test verifies that data is returned for each fcstLen and each threshold. It can be used to debug the builder by putting a specific epoch for first_epoch. @@ -410,100 +386,88 @@ def test_ctc_builder_visibility_hrrr_ops_all_hrrr(): # pylint: disable=too-many It calculates the CTC using couchbase data for input. Then the couchbase CTC fcstValidEpochs are compared and asserted against the derived CTC. """ - # noinspection PyBroadException - global cb_model_obs_data # pylint: disable=global-variable-not-assigned - global stations # pylint: disable=global-variable-not-assigned - try: - credentials_file = os.environ["CREDENTIALS"] - job_id = "JOB-TEST:V01:METAR:CTC:VISIBILITY:MODEL:OPS" - outdir = "/opt/data/ctc_to_cb/hrrr_ops/visibility/output" - if not os.path.exists(outdir): - # Create a new directory because it does not exist - os.makedirs(outdir) - filepaths = outdir + "/*.json" - files = glob.glob(filepaths) - for _f in files: - try: - os.remove(_f) - except OSError as _e: - assert False, f"Error: {_e}" - log_queue = Queue() - - vx_ingest = VXIngest() - # These CTC's might already have been ingested in which case this won't do anything. - vx_ingest.runit( - { - "job_id": job_id, - "credentials_file": credentials_file, - "output_dir": outdir, - "threads": 1, - "first_epoch": 1638489600, - "last_epoch": 1638496800, - }, - log_queue, - stub_worker_log_configurer, - ) + global cb_model_obs_data + global stations - list_of_output_files = glob.glob(outdir + "/*") - # latest_output_file = max(list_of_output_files, key=os.path.getctime) - latest_output_file = min(list_of_output_files, key=os.path.getctime) - try: - # Opening JSON file - output_file = open(latest_output_file, encoding="utf8") - # returns JSON object as a dictionary - vx_ingest_output_data = json.load(output_file) - # if this is an LJ document then the CTC's were already ingested - # and the test should stop here - if vx_ingest_output_data[0]["type"] == "LJ": - return - # get the last fcstValidEpochs - fcst_valid_epochs = {doc["fcstValidEpoch"] for doc in vx_ingest_output_data} - # take a fcstValidEpoch in the middle of the list - fcst_valid_epoch = list(fcst_valid_epochs)[int(len(fcst_valid_epochs) / 2)] - _thresholds = ["0.5", "1.0", "3.0", "5.0", "10.0"] - # get all the documents that have the chosen fcstValidEpoch - docs = [ - _doc - for _doc in vx_ingest_output_data - if _doc["fcstValidEpoch"] == fcst_valid_epoch - ] - # get all the fcstLens for those docs - fcst_lens = [] - for _elem in docs: - fcst_lens.append(_elem["fcstLen"]) - output_file.close() - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestCTCBuilderV01 Exception failure opening output: {_e}" - for _i in fcst_lens: - _elem = None - # find the document for this fcst_len - for _elem in docs: - if _elem["fcstLen"] == _i: - break - # process all the thresholds - for _threshold in _thresholds: - print( - f"Asserting derived CTC for fcstValidEpoch: {_elem['fcstValidEpoch']} model: HRRR_OPS region: ALL_HRRR fcst_len: {_i} threshold: {_threshold}" - ) - cb_ctc = calculate_cb_ctc( - epoch=_elem["fcstValidEpoch"], - fcst_len=_i, - threshold=float(_threshold), - model="HRRR_OPS", - subset="METAR", - doc_sub_type="Visibility", - region="ALL_HRRR", - ) - if cb_ctc is None: - print(f"cb_ctc is None for threshold {str(_threshold)}- contunuing") - continue - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestCTCBuilderV01 Exception failure: {_e}" - - -def test_ctc_ceiling_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals - # noinspection PyBroadException + credentials_file = os.environ["CREDENTIALS"] + job_id = "JOB-TEST:V01:METAR:CTC:VISIBILITY:MODEL:OPS" + outdir = Path("/opt/data/ctc_to_cb/hrrr_ops/visibility/output") + if not outdir.exists(): + # Create a new directory because it does not exist + outdir.mkdir(parents=True) + files = outdir.glob("*.json") + for _f in files: + _f.unlink() + log_queue = Queue() + + vx_ingest = VXIngest() + # These CTC's might already have been ingested in which case this won't do anything. + vx_ingest.runit( + { + "job_id": job_id, + "credentials_file": credentials_file, + "output_dir": str(outdir), + "threads": 1, + "first_epoch": 1638489600, + "last_epoch": 1638496800, + }, + log_queue, + stub_worker_log_configurer, + ) + + list_of_output_files = outdir.glob("*") + latest_output_file = min(list_of_output_files, key=os.path.getctime) + # Opening JSON file + with latest_output_file.open(encoding="utf8") as output_file: + # returns JSON object as a dictionary + vx_ingest_output_data = json.load(output_file) + # if this is an LJ document then the CTC's were already ingested + # and the test should stop here + if vx_ingest_output_data[0]["type"] == "LJ": + return + # get the last fcstValidEpochs + fcst_valid_epochs = {doc["fcstValidEpoch"] for doc in vx_ingest_output_data} + # take a fcstValidEpoch in the middle of the list + fcst_valid_epoch = list(fcst_valid_epochs)[int(len(fcst_valid_epochs) / 2)] + _thresholds = ["0.5", "1.0", "3.0", "5.0", "10.0"] + # get all the documents that have the chosen fcstValidEpoch + docs = [ + _doc + for _doc in vx_ingest_output_data + if _doc["fcstValidEpoch"] == fcst_valid_epoch + ] + # get all the fcstLens for those docs + fcst_lens = [] + for _elem in docs: + fcst_lens.append(_elem["fcstLen"]) + + for _i in fcst_lens: + _elem = None + # find the document for this fcst_len + for _elem in docs: + if _elem["fcstLen"] == _i: + break + # process all the thresholds + for _threshold in _thresholds: + print( + f"Asserting derived CTC for fcstValidEpoch: {_elem['fcstValidEpoch']} model: HRRR_OPS region: ALL_HRRR fcst_len: {_i} threshold: {_threshold}" + ) + cb_ctc = calculate_cb_ctc( + epoch=_elem["fcstValidEpoch"], + fcst_len=_i, + threshold=float(_threshold), + model="HRRR_OPS", + subset="METAR", + doc_sub_type="Visibility", + region="ALL_HRRR", + ) + if cb_ctc is None: + print(f"cb_ctc is None for threshold {str(_threshold)}- contunuing") + continue + + +def test_ctc_ceiling_data_hrrr_ops_all_hrrr(): """ This test is a comprehensive test of the ctcBuilder data. It will retrieve CTC documents for a specific fcstValidEpoch from couchbase and calculate the CTC's for the same fcstValidEpoch. @@ -514,15 +478,14 @@ def test_ctc_ceiling_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-local credentials_file = os.environ["CREDENTIALS"] assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf8") - yaml_data = yaml.load(_f, yaml.SafeLoader) + with Path(credentials_file).open(encoding="utf-8") as _f: + yaml_data = yaml.load(_f, yaml.SafeLoader) _host = yaml_data["cb_host"] _user = yaml_data["cb_user"] _password = yaml_data["cb_password"] _bucket = yaml_data["cb_bucket"] _collection = yaml_data["cb_collection"] _scope = yaml_data["cb_scope"] - _f.close() timeout_options = ClusterTimeoutOptions( kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) @@ -532,116 +495,111 @@ def test_ctc_ceiling_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-local ) cluster = Cluster("couchbase://" + _host, options) # get available fcstValidEpochs for couchbase - try: - result = cluster.query( - f"""SELECT RAW fcstValidEpoch - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type="DD" - AND docType="CTC" - AND subDocType = "CEILING" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}'""" - ) - cb_fcst_valid_epochs = list(result) - if len(cb_fcst_valid_epochs) == 0: - assert False, "There is no data" - # choose the last one - # fcst_valid_epoch = cb_fcst_valid_epochs[-1] - fcst_valid_epoch = cb_fcst_valid_epochs[round(len(cb_fcst_valid_epochs) / 2)] - # get all the cb fcstLen values - result = cluster.query( - f"""SELECT raw fcstLen - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "CTC" - AND subDocType = "CEILING" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - order by fcstLen - """ - ) - cb_fcst_valid_lens = list(result) - # get the thesholdDescriptions from the couchbase metadata - # result = cluster.query( - # f""" - # SELECT RAW thresholdDescriptions.ceiling - # FROM `{_bucket}`.{_scope}.{_collection} - # WHERE type="MD" - # AND docType="matsAux" - # """, - # read_only=True, - # ) - # get the associated couchbase ceiling model data - # get the associated couchbase obs - # get the ctc couchbase data - result = cluster.query( - f""" - SELECT * - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "CTC" - AND subDocType = "CEILING" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - AND fcstLen IN {cb_fcst_valid_lens} - order by fcstLen; - """ - ) - cb_results = list(result) - # print the couchbase statement - print( - "cb statement is:" - + f""" - SELECT * - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "CTC" - AND subDocType = "CEILING" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - AND fcstLen IN {cb_fcst_valid_lens} - order by fcstLen;""" - ) - for _cb_ctc in cb_results: - fcstln = _cb_ctc["METAR"]["fcstLen"] - for _threshold in _cb_ctc["METAR"]["data"].keys(): - _ctc = calculate_cb_ctc( - fcst_valid_epoch, - fcstln, - float(_threshold), - "HRRR_OPS", - _collection, - doc_sub_type="Ceiling", - region="ALL_HRRR", - ) - # assert ctc values - fields = ["hits", "misses", "false_alarms", "correct_negatives"] - for field in fields: - _ctc_value = _ctc[field] - _cb_ctc_value = _cb_ctc[_collection]["data"][_threshold][field] - assert _ctc_value == _cb_ctc_value, f""" - For epoch : {_ctc['fcst_valid_epoch']} - and fstLen: {_ctc['fcst_len']} - and threshold: {_threshold} - the derived CTC {field}: {_ctc_value} and caclulated CTC {field}: {_cb_ctc_value} values do not match""" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestCTCBuilderV01 Exception failure: {_e}" - return - - -def test_ctc_visibiltiy_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals - # noinspection PyBroadException + + result = cluster.query( + f"""SELECT RAW fcstValidEpoch + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type="DD" + AND docType="CTC" + AND subDocType = "CEILING" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}'""" + ) + cb_fcst_valid_epochs = list(result) + if len(cb_fcst_valid_epochs) == 0: + pytest.fail("There is no data") + # choose the last one + fcst_valid_epoch = cb_fcst_valid_epochs[round(len(cb_fcst_valid_epochs) / 2)] + # get all the cb fcstLen values + result = cluster.query( + f"""SELECT raw fcstLen + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "CTC" + AND subDocType = "CEILING" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + order by fcstLen + """ + ) + cb_fcst_valid_lens = list(result) + # get the thesholdDescriptions from the couchbase metadata + # result = cluster.query( + # f""" + # SELECT RAW thresholdDescriptions.ceiling + # FROM `{_bucket}`.{_scope}.{_collection} + # WHERE type="MD" + # AND docType="matsAux" + # """, + # read_only=True, + # ) + # get the associated couchbase ceiling model data + # get the associated couchbase obs + # get the ctc couchbase data + result = cluster.query( + f""" + SELECT * + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "CTC" + AND subDocType = "CEILING" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + AND fcstLen IN {cb_fcst_valid_lens} + order by fcstLen; + """ + ) + cb_results = list(result) + # print the couchbase statement + print( + "cb statement is:" + + f""" + SELECT * + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "CTC" + AND subDocType = "CEILING" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + AND fcstLen IN {cb_fcst_valid_lens} + order by fcstLen;""" + ) + for _cb_ctc in cb_results: + fcstln = _cb_ctc["METAR"]["fcstLen"] + for _threshold in _cb_ctc["METAR"]["data"]: + _ctc = calculate_cb_ctc( + fcst_valid_epoch, + fcstln, + float(_threshold), + "HRRR_OPS", + _collection, + doc_sub_type="Ceiling", + region="ALL_HRRR", + ) + # assert ctc values + fields = ["hits", "misses", "false_alarms", "correct_negatives"] + for field in fields: + _ctc_value = _ctc[field] + _cb_ctc_value = _cb_ctc[_collection]["data"][_threshold][field] + assert _ctc_value == _cb_ctc_value, f""" + For epoch : {_ctc['fcst_valid_epoch']} + and fstLen: {_ctc['fcst_len']} + and threshold: {_threshold} + the derived CTC {field}: {_ctc_value} and caclulated CTC {field}: {_cb_ctc_value} values do not match""" + + +def test_ctc_visibiltiy_data_hrrr_ops_all_hrrr(): """ This test is a comprehensive test of the ctcBuilder data. It will retrieve CTC documents for a specific fcstValidEpoch from couchbase and calculate the CTC's for the same fcstValidEpoch. @@ -652,15 +610,14 @@ def test_ctc_visibiltiy_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-lo credentials_file = os.environ["CREDENTIALS"] assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf8") - yaml_data = yaml.load(_f, yaml.SafeLoader) + with Path(credentials_file).open(encoding="utf-8") as _f: + yaml_data = yaml.load(_f, yaml.SafeLoader) _host = yaml_data["cb_host"] _user = yaml_data["cb_user"] _password = yaml_data["cb_password"] _bucket = yaml_data["cb_bucket"] _collection = yaml_data["cb_collection"] _scope = yaml_data["cb_scope"] - _f.close() timeout_options = ClusterTimeoutOptions( kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) @@ -670,108 +627,104 @@ def test_ctc_visibiltiy_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-lo ) cluster = Cluster("couchbase://" + _host, options) # get available fcstValidEpochs for couchbase - try: - stmnt = f"""SELECT RAW fcstValidEpoch + + stmnt = f"""SELECT RAW fcstValidEpoch + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type="DD" + AND docType="CTC" + AND subDocType = "VISIBILITY" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}'""" + result = cluster.query(stmnt) + cb_fcst_valid_epochs = list(result) + if len(cb_fcst_valid_epochs) == 0: + pytest.fail("There is no data") + # choose the last one + fcst_valid_epoch = cb_fcst_valid_epochs[round(len(cb_fcst_valid_epochs) / 2)] + # get all the cb fcstLen values + result = cluster.query( + f"""SELECT raw fcstLen FROM `{_bucket}`.{_scope}.{_collection} - WHERE type="DD" - AND docType="CTC" + WHERE type='DD' + AND docType = "CTC" AND subDocType = "VISIBILITY" AND model='HRRR_OPS' AND region='ALL_HRRR' AND version='V01' - AND subset='{_collection}'""" - result = cluster.query(stmnt) - cb_fcst_valid_epochs = list(result) - if len(cb_fcst_valid_epochs) == 0: - assert False, "There is no data" - # choose the last one - # fcst_valid_epoch = cb_fcst_valid_epochs[-1] - fcst_valid_epoch = cb_fcst_valid_epochs[round(len(cb_fcst_valid_epochs) / 2)] - # get all the cb fcstLen values - result = cluster.query( - f"""SELECT raw fcstLen - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "CTC" - AND subDocType = "VISIBILITY" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - order by fcstLen - """ - ) - cb_fcst_valid_lens = list(result) - # get the thesholdDescriptions from the couchbase metadata - # result = cluster.query( - # f""" - # SELECT RAW thresholdDescriptions.visibility - # FROM `{_bucket}`.{_scope}.{_collection} - # WHERE type="MD" - # AND docType="matsAux" - # """, - # read_only=True, - # ) - # get the associated couchbase ceiling model data - # get the associated couchbase obs - # get the ctc couchbase data - result = cluster.query( - f""" - SELECT * - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "CTC" - AND subDocType = "VISIBILITY" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - AND fcstLen IN {cb_fcst_valid_lens} - order by fcstLen; - """ - ) - cb_results = list(result) - # print the couchbase statement - print( - "cb statement is:" - + f""" - SELECT * - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "CTC" - AND subDocType = "VISIBILITY" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - AND fcstLen IN {cb_fcst_valid_lens} - order by fcstLen;""" - ) - for _cb_ctc in cb_results: - fcstln = _cb_ctc["METAR"]["fcstLen"] - for _threshold in _cb_ctc["METAR"]["data"].keys(): - _ctc = calculate_cb_ctc( - fcst_valid_epoch, - fcstln, - float(_threshold), - "HRRR_OPS", - _collection, - doc_sub_type="Visibility", - region="ALL_HRRR", - ) - # assert ctc values - fields = ["hits", "misses", "false_alarms", "correct_negatives"] - for field in fields: - _ctc_value = _ctc[field] - _cb_ctc_value = _cb_ctc[_collection]["data"][_threshold][field] - assert _ctc_value == _cb_ctc_value, f""" - For epoch : {_ctc['fcst_valid_epoch']} - and fstLen: {_ctc['fcst_len']} - and threshold: {_threshold} - the derived CTC {field}: {_ctc_value} and caclulated CTC {field}: {_cb_ctc_value} values do not match""" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestCTCBuilderV01 Exception failure: {_e}" - return + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + order by fcstLen + """ + ) + cb_fcst_valid_lens = list(result) + # get the thesholdDescriptions from the couchbase metadata + # result = cluster.query( + # f""" + # SELECT RAW thresholdDescriptions.visibility + # FROM `{_bucket}`.{_scope}.{_collection} + # WHERE type="MD" + # AND docType="matsAux" + # """, + # read_only=True, + # ) + # get the associated couchbase ceiling model data + # get the associated couchbase obs + # get the ctc couchbase data + result = cluster.query( + f""" + SELECT * + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "CTC" + AND subDocType = "VISIBILITY" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + AND fcstLen IN {cb_fcst_valid_lens} + order by fcstLen; + """ + ) + cb_results = list(result) + # print the couchbase statement + print( + "cb statement is:" + + f""" + SELECT * + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "CTC" + AND subDocType = "VISIBILITY" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + AND fcstLen IN {cb_fcst_valid_lens} + order by fcstLen;""" + ) + for _cb_ctc in cb_results: + fcstln = _cb_ctc["METAR"]["fcstLen"] + for _threshold in _cb_ctc["METAR"]["data"]: + _ctc = calculate_cb_ctc( + fcst_valid_epoch, + fcstln, + float(_threshold), + "HRRR_OPS", + _collection, + doc_sub_type="Visibility", + region="ALL_HRRR", + ) + # assert ctc values + fields = ["hits", "misses", "false_alarms", "correct_negatives"] + for field in fields: + _ctc_value = _ctc[field] + _cb_ctc_value = _cb_ctc[_collection]["data"][_threshold][field] + assert _ctc_value == _cb_ctc_value, f""" + For epoch : {_ctc['fcst_valid_epoch']} + and fstLen: {_ctc['fcst_len']} + and threshold: {_threshold} + the derived CTC {field}: {_ctc_value} and caclulated CTC {field}: {_cb_ctc_value} values do not match""" diff --git a/tests/vxingest/ctc_to_cb/test_unit_metar_ctc.py b/tests/vxingest/ctc_to_cb/test_unit_metar_ctc.py index b3048a2..971286c 100644 --- a/tests/vxingest/ctc_to_cb/test_unit_metar_ctc.py +++ b/tests/vxingest/ctc_to_cb/test_unit_metar_ctc.py @@ -1,39 +1,34 @@ -# pylint: disable=missing-module-docstring import os from multiprocessing import JoinableQueue +import pytest from vxingest.ctc_to_cb.run_ingest_threads import VXIngest from vxingest.ctc_to_cb.vx_ingest_manager import VxIngestManager def setup_ingest(): """test setup""" - try: - _vx_ingest = VXIngest() - _vx_ingest.credentials_file = os.environ["CREDENTIALS"] - _vx_ingest.load_spec = {} - _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) - _vx_ingest.connect_cb() - _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( - "JOB-TEST:V01:METAR:CTC:CEILING:MODEL:OPS" - ).content_as[dict]["ingest_document_ids"] - _vx_ingest.load_spec["ingest_documents"] = {} - for _id in _vx_ingest.load_spec["ingest_document_ids"]: - _vx_ingest.load_spec["ingest_documents"][_id] = _vx_ingest.collection.get( - _id - ).content_as[dict] + _vx_ingest = VXIngest() + _vx_ingest.credentials_file = os.environ["CREDENTIALS"] + _vx_ingest.load_spec = {} + _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) + _vx_ingest.connect_cb() + _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( + "JOB-TEST:V01:METAR:CTC:CEILING:MODEL:OPS" + ).content_as[dict]["ingest_document_ids"] + _vx_ingest.load_spec["ingest_documents"] = {} + for _id in _vx_ingest.load_spec["ingest_document_ids"]: + _vx_ingest.load_spec["ingest_documents"][_id] = _vx_ingest.collection.get( + _id + ).content_as[dict] - # Don't pass the log queue and configuration function to the VxIngestManager - # as they aren't needed as long as `.run()` isn't called. - vx_ingest_manager = VxIngestManager( - "test", _vx_ingest.load_spec, JoinableQueue(), "/tmp", None, None - ) - assert ( - vx_ingest_manager is not None - ), "vx_ingest_manager is None and should not be" - return _vx_ingest, vx_ingest_manager - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" + # Don't pass the log queue and configuration function to the VxIngestManager + # as they aren't needed as long as `.run()` isn't called. + vx_ingest_manager = VxIngestManager( + "test", _vx_ingest.load_spec, JoinableQueue(), "/tmp", None, None + ) + assert vx_ingest_manager is not None, "vx_ingest_manager is None and should not be" + return _vx_ingest, vx_ingest_manager def test_cb_connect_disconnect(): @@ -47,8 +42,8 @@ def test_cb_connect_disconnect(): assert vx_ingest is not None, "vx_ingest is None" assert local_time is not None, "local_time from CB should not be None" vx_ingest_manager.close_cb() - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_cb_connect_disconnect Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_cb_connect_disconnect Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() @@ -61,25 +56,24 @@ def test_credentials_and_load_spec(): vx_ingest, vx_ingest_manager = setup_ingest() assert vx_ingest.load_spec["cb_connection"]["user"] == "avid" vx_ingest_manager.close_cb() - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_credentials_and_load_spec Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() -def test_write_load_job_to_files(): +def test_write_load_job_to_files(tmp_path): """test write the load job""" vx_ingest_manager = None try: vx_ingest, vx_ingest_manager = setup_ingest() vx_ingest.load_job_id = "test_id" - vx_ingest.output_dir = "/tmp" + vx_ingest.output_dir = tmp_path vx_ingest.load_spec["load_job_doc"] = {"test": "a line of text"} vx_ingest.write_load_job_to_files() - os.remove("/tmp/test_id.json") - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_write_load_job_to_files Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_write_load_job_to_files Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() @@ -99,8 +93,8 @@ def test_build_load_job_doc(): ].startswith( "LJ:METAR:vxingest.ctc_to_cb.run_ingest_threads:VXIngest" ), f"load_job ID is wrong: {ljd['id']} does not start with 'LJ:METAR:ctc_to_cb.run_ingest_threads:VXIngest'" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_build_load_job_doc Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_build_load_job_doc Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() diff --git a/tests/vxingest/ctc_to_cb/test_unit_queries_ctc.py b/tests/vxingest/ctc_to_cb/test_unit_queries_ctc.py index cc5c078..d851c5a 100644 --- a/tests/vxingest/ctc_to_cb/test_unit_queries_ctc.py +++ b/tests/vxingest/ctc_to_cb/test_unit_queries_ctc.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring import os from datetime import timedelta from pathlib import Path @@ -13,187 +12,136 @@ def connect_cb(): """ create a couchbase connection and maintain the collection and cluster objects. """ - # noinspection PyBroadException - try: - try: - cb_connection # is it defined pylint:disable=used-before-assignment - except NameError: - credentials_file = os.environ["CREDENTIALS"] - assert ( - Path(credentials_file).is_file() is True - ), f"*** credentials_file file {credentials_file} can not be found!" - _f = open(credentials_file, encoding="utf-8") - _yaml_data = yaml.load(_f, yaml.SafeLoader) - cb_connection = {} - cb_connection["host"] = _yaml_data["cb_host"] - cb_connection["user"] = _yaml_data["cb_user"] - cb_connection["password"] = _yaml_data["cb_password"] - cb_connection["bucket"] = _yaml_data["cb_bucket"] - cb_connection["collection"] = _yaml_data["cb_collection"] - cb_connection["scope"] = _yaml_data["cb_scope"] - _f.close() - - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), - timeout_options=timeout_options, - ) - cb_connection["cluster"] = Cluster( - "couchbase://" + cb_connection["host"], options - ) - cb_connection["collection"] = ( - cb_connection["cluster"] - .bucket(cb_connection["bucket"]) - .collection(cb_connection["collection"]) - ) - return cb_connection - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_unit_queries Exception failure connecting: {_e}" + + credentials_file = os.environ["CREDENTIALS"] + assert ( + Path(credentials_file).is_file() is True + ), f"*** credentials_file file {credentials_file} can not be found!" + with Path(credentials_file).open(encoding="utf-8") as _f: + _yaml_data = yaml.load(_f, yaml.SafeLoader) + cb_connection = {} + cb_connection["host"] = _yaml_data["cb_host"] + cb_connection["user"] = _yaml_data["cb_user"] + cb_connection["password"] = _yaml_data["cb_password"] + cb_connection["bucket"] = _yaml_data["cb_bucket"] + cb_connection["collection"] = _yaml_data["cb_collection"] + cb_connection["scope"] = _yaml_data["cb_scope"] + + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), + timeout_options=timeout_options, + ) + cb_connection["cluster"] = Cluster("couchbase://" + cb_connection["host"], options) + cb_connection["collection"] = ( + cb_connection["cluster"] + .bucket(cb_connection["bucket"]) + .collection(cb_connection["collection"]) + ) + return cb_connection def test_epoch_fcstlen_model(request): - """test""" - try: - _name = request.node.name - _expected_time = 3.0 - testdata = Path( - "tests/vxingest/ctc_to_cb/testdata/test_epoch_fcstLen_model.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 3.0 + testdata = Path("tests/vxingest/ctc_to_cb/testdata/test_epoch_fcstLen_model.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_epoch_fcstlen_obs(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.2 - testdata = Path("tests/vxingest/ctc_to_cb/testdata/test_epoch_fcstLen_obs.n1ql") - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.2 + testdata = Path("tests/vxingest/ctc_to_cb/testdata/test_epoch_fcstLen_obs.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_forecast_valid_epoch(request): - """test""" - try: - _name = request.node.name - _expected_time = 6.0 - testdata = Path( - "tests/vxingest/ctc_to_cb/testdata/test_forecast_valid_epoch.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 6.0 + testdata = Path("tests/vxingest/ctc_to_cb/testdata/test_forecast_valid_epoch.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_region_lat_lon(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.01 - testdata = Path( - "tests/vxingest/ctc_to_cb/testdata/test_get_region_lat_lon.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.01 + testdata = Path("tests/vxingest/ctc_to_cb/testdata/test_get_region_lat_lon.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_stations(request): - """test""" - try: - _name = request.node.name - _expected_time = 3 - testdata = Path("tests/vxingest/ctc_to_cb/testdata/test_get_stations.n1ql") - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 3 + testdata = Path("tests/vxingest/ctc_to_cb/testdata/test_get_stations.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_threshold_descriptions(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.01 - testdata = Path( - "tests/vxingest/ctc_to_cb/testdata/test_get_threshold_descriptions.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" - - -# geo search for stations - currently not implemented -# _result1 = self.load_spec["cluster"].search_query( -# "station_geo", -# GeoBoundingBoxQuery( -# top_left=(_boundingbox["tl_lon"], _boundingbox["tl_lat"]), -# bottom_right=(_boundingbox["br_lon"], _boundingbox["br_lat"]), -# field="geo", -# ), -# SearchOptions(fields=["name"], limit=10000), -# ) + _name = request.node.name + _expected_time = 0.01 + testdata = Path( + "tests/vxingest/ctc_to_cb/testdata/test_get_threshold_descriptions.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" diff --git a/tests/vxingest/grib2_to_cb/test_int_metar_model_grib.py b/tests/vxingest/grib2_to_cb/test_int_metar_model_grib.py index 231a724..7af6c0c 100644 --- a/tests/vxingest/grib2_to_cb/test_int_metar_model_grib.py +++ b/tests/vxingest/grib2_to_cb/test_int_metar_model_grib.py @@ -5,12 +5,10 @@ 21 196 14 000018 %y %j %H %f treating the last 6 decimals as microseconds even though they are not. these files are two digit year, day of year, hour, and forecast lead time (6 digit ??) """ -import glob import json import math import os from datetime import timedelta -from glob import glob from multiprocessing import Queue from pathlib import Path @@ -32,211 +30,199 @@ def connect_cb(): """ create a couchbase connection and maintain the collection and cluster objects. """ - try: - if cb_connection: # pylint: disable=used-before-assignment - return cb_connection - else: - credentials_file = os.environ["CREDENTIALS"] - assert ( - Path(credentials_file).is_file() is True - ), f"*** credentials_file file {credentials_file} can not be found!" - _f = open(credentials_file, encoding="utf-8") + if cb_connection: + return cb_connection + else: + credentials_file = os.environ["CREDENTIALS"] + assert ( + Path(credentials_file).is_file() is True + ), f"*** credentials_file file {credentials_file} can not be found!" + with Path(credentials_file).open(encoding="utf-8") as _f: _yaml_data = yaml.load(_f, yaml.SafeLoader) - cb_connection["host"] = _yaml_data["cb_host"] - cb_connection["user"] = _yaml_data["cb_user"] - cb_connection["password"] = _yaml_data["cb_password"] - cb_connection["bucket"] = _yaml_data["cb_bucket"] - cb_connection["collection"] = _yaml_data["cb_collection"] - cb_connection["scope"] = _yaml_data["cb_scope"] - _f.close() + cb_connection["host"] = _yaml_data["cb_host"] + cb_connection["user"] = _yaml_data["cb_user"] + cb_connection["password"] = _yaml_data["cb_password"] + cb_connection["bucket"] = _yaml_data["cb_bucket"] + cb_connection["collection"] = _yaml_data["cb_collection"] + cb_connection["scope"] = _yaml_data["cb_scope"] - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), - timeout_options=timeout_options, - ) - cb_connection["cluster"] = Cluster( - "couchbase://" + cb_connection["host"], options - ) - cb_connection["collection"] = ( - cb_connection["cluster"] - .bucket(cb_connection["bucket"]) - .collection(cb_connection["collection"]) - ) - return cb_connection - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_unit_queries Exception failure connecting: {_e}" + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), + timeout_options=timeout_options, + ) + cb_connection["cluster"] = Cluster( + "couchbase://" + cb_connection["host"], options + ) + cb_connection["collection"] = ( + cb_connection["cluster"] + .bucket(cb_connection["bucket"]) + .collection(cb_connection["collection"]) + ) + return cb_connection def test_grib_builder_one_thread_file_pattern_hrrr_ops_conus(tmp_path): """test gribBuilder with one thread. This test verifies the resulting data file against the one that is in couchbase already in order to make sure the calculations are proper.""" - try: - # 1632412800 fcst_len 1 -> 1632412800 - 1 * 3600 -> 1632409200 September 23, 2021 15:00:00 -> 2126615000001 - # 1632412800 fcst_len 3 -> 1632412800 - 3 * 3600 -> 1632402000 September 23, 2021 13:00:00 -> 2126613000003 - # 1632412800 fcst_len 15 -> 1632412800 - 15 * 3600 -> 1632358800 September 22, 2021 19:00:00 -> (missing) - # 1632412800 fcst_len 18 -> 1632412800 - 18 * 3600 -> 1632348000 September 22, 2021 22:00:00 -> 2126522000018 (missing) - # 1632420000 September 23, 2021 18:00:00 2126616000018 - # 1632423600 September 23, 2021 19:00:00 2126617000001 - # first_epoch = 1634252400 - 10 - # last_epoch = 1634252400 + 10 - credentials_file = os.environ["CREDENTIALS"] - log_queue = Queue() - vx_ingest = VXIngest() - vx_ingest.runit( - { - "job_id": "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR", - "credentials_file": credentials_file, - "file_name_mask": "%y%j%H%f", - "output_dir": f"{tmp_path}", - "threads": 1, - "file_pattern": "21287230000[0123456789]?", - }, - log_queue, - stub_worker_log_configurer, - ) - # check the output files to see if they match the documents that were - # preveously created by the real ingest process - for _f in glob(f"{tmp_path}/*.json"): - # read in the output file - _json = None - with open(_f, encoding="utf-8") as _f: - _json = json.load(_f)[0] - _id = _json["id"] - if _id.startswith("LJ"): - for _k in _json.keys(): - assert ( - _k - in [ - "id", - "subset", - "type", - "lineageId", - "script", - "scriptVersion", - "loadSpec", - "note", - ] - ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus LJ failure key {_k} not in {_json.keys()}" - continue - _statement = f"select METAR.* from `{connect_cb()['bucket']}`._default.METAR where meta().id = '{_id}'" - qresult = connect_cb()["cluster"].query(_statement) - res_rows = list(qresult.rows()) - assert ( - len(res_rows) > 0 - ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure test document {_id} not found in couchbase" - result = list(qresult.rows())[0] - # assert top level fields - keys = _json.keys() - for _k in result.keys(): + # 1632412800 fcst_len 1 -> 1632412800 - 1 * 3600 -> 1632409200 September 23, 2021 15:00:00 -> 2126615000001 + # 1632412800 fcst_len 3 -> 1632412800 - 3 * 3600 -> 1632402000 September 23, 2021 13:00:00 -> 2126613000003 + # 1632412800 fcst_len 15 -> 1632412800 - 15 * 3600 -> 1632358800 September 22, 2021 19:00:00 -> (missing) + # 1632412800 fcst_len 18 -> 1632412800 - 18 * 3600 -> 1632348000 September 22, 2021 22:00:00 -> 2126522000018 (missing) + # 1632420000 September 23, 2021 18:00:00 2126616000018 + # 1632423600 September 23, 2021 19:00:00 2126617000001 + # first_epoch = 1634252400 - 10 + # last_epoch = 1634252400 + 10 + credentials_file = os.environ["CREDENTIALS"] + log_queue = Queue() + vx_ingest = VXIngest() + vx_ingest.runit( + { + "job_id": "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR", + "credentials_file": credentials_file, + "file_name_mask": "%y%j%H%f", + "output_dir": f"{tmp_path}", + "threads": 1, + "file_pattern": "21287230000[0123456789]?", + }, + log_queue, + stub_worker_log_configurer, + ) + # check the output files to see if they match the documents that were + # preveously created by the real ingest process + for _f in tmp_path.glob("*.json"): + # read in the output file + _json = None + with _f.open(encoding="utf-8") as json_file: + _json = json.load(json_file)[0] + _id = _json["id"] + if _id.startswith("LJ"): + for _k in _json: assert ( - _k in keys - ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure top level key {_k} not in {_json.keys()}" - # assert the units + _k + in [ + "id", + "subset", + "type", + "lineageId", + "script", + "scriptVersion", + "loadSpec", + "note", + ] + ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus LJ failure key {_k} not in {_json.keys()}" + continue + _statement = f"select METAR.* from `{connect_cb()['bucket']}`._default.METAR where meta().id = '{_id}'" + qresult = connect_cb()["cluster"].query(_statement) + result_rows = list(qresult.rows()) + assert ( + len(result_rows) > 0 + ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure test document {_id} not found in couchbase" + + result = result_rows[0] + # assert top level fields + keys = _json.keys() + for _k in result: + assert ( + _k in keys + ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure top level key {_k} not in {_json.keys()}" + # assert the units + assert ( + result["units"] == _json["units"] + ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure units {result['units']} != {_json['units']}" + # assert the data + for _k in result["data"]: assert ( - result["units"] == _json["units"] - ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure units {result['units']} != {_json['units']}" - # assert the data - for _k in result["data"].keys(): + _k in _json["data"] + ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure data key {_k} not in {_json['data'].keys()}" + for _dk in result["data"][_k]: assert ( - _k in _json["data"].keys() - ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure data key {_k} not in {_json['data'].keys()}" - for _dk in result["data"][_k].keys(): + _dk in _json["data"][_k] + ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure data key {_k}.{_dk} not in {_json['data'][_k].keys()}" + # assert data field matches to 2 decimal places + if _dk == "name": + # string compare assert ( - _dk in _json["data"][_k].keys() - ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure data key {_k}.{_dk} not in {_json['data'][_k].keys()}" - # assert data field matches to 2 decimal places - if _dk == "name": - # string compare - assert ( - result["data"][_k][_dk] == _json["data"][_k][_dk] - ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure name {result['data'][_k][_dk]} != {_json['data'][_k][_dk]}" + result["data"][_k][_dk] == _json["data"][_k][_dk] + ), f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure name {result['data'][_k][_dk]} != {_json['data'][_k][_dk]}" + else: + # math compare + # print(f"result {_k} {_dk} ", result["data"][_k][_dk]) + abs_tol = 0.0 + if _dk == "Ceiling": + abs_tol = 0.002 # ceiling values don't always have four decimals of resolution + elif _dk == "DewPoint": + abs_tol = 1.0001 # DewPoint only has 3 decimals of precision from pygrib whereas cfgrib is having 4 (or at least the old ingest only had four) + # abs_tol = 0.0001 # DewPoint only has 3 decimals of precision from pygrib whereas cfgrib is having 4 (or at least the old ingest only had four) + elif ( + _dk == "RH" + ): # RH only has one decimal of resolution from the grib file + abs_tol = 1.00001 # not really dure why math.isclose compares out to 5 places but not 6 + # abs_tol = 0.00001 # not really dure why math.isclose compares out to 5 places but not 6 + # There are no unusual math transformations in the RH handler. else: - # math compare - # print(f"result {_k} {_dk} ", result["data"][_k][_dk]) - abs_tol = 0.0 - if _dk == "Ceiling": - abs_tol = 0.002 # ceiling values don't always have four decimals of resolution - elif _dk == "DewPoint": - abs_tol = 1.0001 # DewPoint only has 3 decimals of precision from pygrib whereas cfgrib is having 4 (or at least the old ingest only had four) - # abs_tol = 0.0001 # DewPoint only has 3 decimals of precision from pygrib whereas cfgrib is having 4 (or at least the old ingest only had four) - elif ( - _dk == "RH" - ): # RH only has one decimal of resolution from the grib file - abs_tol = 1.00001 # not really dure why math.isclose compares out to 5 places but not 6 - # abs_tol = 0.00001 # not really dure why math.isclose compares out to 5 places but not 6 - # There are no unusual math transformations in the RH handler. - else: - abs_tol = 0.0000000000001 # most fields validate between pygrib and cfgrib precisely + abs_tol = 0.0000000000001 # most fields validate between pygrib and cfgrib precisely - assert math.isclose( - result["data"][_k][_dk], - _json["data"][_k][_dk], - abs_tol=abs_tol, - ), f"""TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure data not close within {abs_tol} - {_k}.{_dk} {result['data'][_k][_dk]} != {_json['data'][_k][_dk]} within {abs_tol} decimal places.""" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus Exception failure: {_e}" + assert math.isclose( + result["data"][_k][_dk], + _json["data"][_k][_dk], + abs_tol=abs_tol, + ), f"""TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus failure data not close within {abs_tol} + {_k}.{_dk} {result['data'][_k][_dk]} != {_json['data'][_k][_dk]} within {abs_tol} decimal places.""" def test_grib_builder_two_threads_file_pattern_hrrr_ops_conus(tmp_path): """test gribBuilder multi-threaded Not going to qulify the data on this one, just make sure it runs two threads properly """ - try: - # 1632412800 fcst_len 1 -> 1632412800 - 1 * 3600 -> 1632409200 September 23, 2021 15:00:00 -> 2126615000001 - # 1632412800 fcst_len 3 -> 1632412800 - 3 * 3600 -> 1632402000 September 23, 2021 13:00:00 -> 2126613000003 - # 1632412800 fcst_len 15 -> 1632412800 - 15 * 3600 -> 1632358800 September 22, 2021 19:00:00 -> (missing) - # 1632412800 fcst_len 18 -> 1632412800 - 18 * 3600 -> 1632348000 September 22, 2021 22:00:00 -> 2126522000018 (missing) - # 1632420000 September 23, 2021 18:00:00 2126616000018 - # 1632423600 September 23, 2021 19:00:00 2126617000001 - # first_epoch = 1634252400 - 10 - # last_epoch = 1634252400 + 10 - credentials_file = os.environ["CREDENTIALS"] - # remove output files - log_queue = Queue() - vx_ingest = VXIngest() - # NOTE: the input file path is defined by the job document - vx_ingest.runit( - { - "job_id": "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR", - "credentials_file": credentials_file, - "file_name_mask": "%y%j%H%f", - "output_dir": f"{tmp_path}", - "threads": 2, - "file_pattern": "21287230000[0123456789]?", - }, - log_queue, - stub_worker_log_configurer, - ) - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus Exception failure: {_e} " + # 1632412800 fcst_len 1 -> 1632412800 - 1 * 3600 -> 1632409200 September 23, 2021 15:00:00 -> 2126615000001 + # 1632412800 fcst_len 3 -> 1632412800 - 3 * 3600 -> 1632402000 September 23, 2021 13:00:00 -> 2126613000003 + # 1632412800 fcst_len 15 -> 1632412800 - 15 * 3600 -> 1632358800 September 22, 2021 19:00:00 -> (missing) + # 1632412800 fcst_len 18 -> 1632412800 - 18 * 3600 -> 1632348000 September 22, 2021 22:00:00 -> 2126522000018 (missing) + # 1632420000 September 23, 2021 18:00:00 2126616000018 + # 1632423600 September 23, 2021 19:00:00 2126617000001 + # first_epoch = 1634252400 - 10 + # last_epoch = 1634252400 + 10 + credentials_file = os.environ["CREDENTIALS"] + # remove output files + log_queue = Queue() + vx_ingest = VXIngest() + # NOTE: the input file path is defined by the job document + vx_ingest.runit( + { + "job_id": "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR", + "credentials_file": credentials_file, + "file_name_mask": "%y%j%H%f", + "output_dir": f"{tmp_path}", + "threads": 2, + "file_pattern": "21287230000[0123456789]?", + }, + log_queue, + stub_worker_log_configurer, + ) def test_grib_builder_two_threads_file_pattern_rap_ops_130_conus(tmp_path): """test gribBuilder multi-threaded Not going to qulify the data on this one, just make sure it runs two threads properly """ - try: - credentials_file = os.environ["CREDENTIALS"] - # remove output files - log_queue = Queue() - vx_ingest = VXIngest() - # NOTE: the input file path is defined by the job document - vx_ingest.runit( - { - "job_id": "JOB-TEST:V01:METAR:GRIB2:MODEL:RAP_OPS_130", - "credentials_file": credentials_file, - "file_name_mask": "%y%j%H%f", - "output_dir": f"{tmp_path}", - "threads": 2, - "file_pattern": "23332080000[0123456789]?", - }, - log_queue, - stub_worker_log_configurer, - ) - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGribBuilderV01.test_gribBuilder_one_epoch_hrrr_ops_conus Exception failure: {_e} " + credentials_file = os.environ["CREDENTIALS"] + # remove output files + log_queue = Queue() + vx_ingest = VXIngest() + # NOTE: the input file path is defined by the job document + vx_ingest.runit( + { + "job_id": "JOB-TEST:V01:METAR:GRIB2:MODEL:RAP_OPS_130", + "credentials_file": credentials_file, + "file_name_mask": "%y%j%H%f", + "output_dir": f"{tmp_path}", + "threads": 2, + "file_pattern": "23332080000[0123456789]?", + }, + log_queue, + stub_worker_log_configurer, + ) diff --git a/tests/vxingest/grib2_to_cb/test_unit_metar_model_grib.py b/tests/vxingest/grib2_to_cb/test_unit_metar_model_grib.py index 5558a11..f6c3519 100644 --- a/tests/vxingest/grib2_to_cb/test_unit_metar_model_grib.py +++ b/tests/vxingest/grib2_to_cb/test_unit_metar_model_grib.py @@ -1,41 +1,32 @@ -# pylint: disable=missing-module-docstring import os -import shutil from pathlib import Path +import pytest from vxingest.grib2_to_cb.run_ingest_threads import VXIngest def setup_connection_multiple_ingest_ids(): """test setup - used to test multiple ingest_document_ids""" - try: - _vx_ingest = VXIngest() - _vx_ingest.credentials_file = os.environ["CREDENTIALS"] - _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) - _vx_ingest.connect_cb() - _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( - "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR" - ).content_as[dict]["ingest_document_ids"] - return _vx_ingest - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" - return None + _vx_ingest = VXIngest() + _vx_ingest.credentials_file = os.environ["CREDENTIALS"] + _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) + _vx_ingest.connect_cb() + _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( + "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR" + ).content_as[dict]["ingest_document_ids"] + return _vx_ingest def setup_connection(): """test setup""" - try: - _vx_ingest = VXIngest() - _vx_ingest.credentials_file = os.environ["CREDENTIALS"] - _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) - _vx_ingest.connect_cb() - _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( - "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR" - ).content_as[dict]["ingest_document_ids"] - return _vx_ingest - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" - return None + _vx_ingest = VXIngest() + _vx_ingest.credentials_file = os.environ["CREDENTIALS"] + _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) + _vx_ingest.connect_cb() + _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( + "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR" + ).content_as[dict]["ingest_document_ids"] + return _vx_ingest def test_credentials_and_load_spec(): @@ -44,8 +35,8 @@ def test_credentials_and_load_spec(): try: vx_ingest = setup_connection() assert True, vx_ingest.load_spec["cb_connection"]["user"] == "cb_user" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_credentials_and_load_spec Exception failure: {_e}") finally: vx_ingest.close_cb() @@ -56,8 +47,8 @@ def test_credentials_and_load_spec_multiple_ingest_ids(): try: vx_ingest = setup_connection_multiple_ingest_ids() assert True, vx_ingest.load_spec["cb_connection"]["user"] == "cb_user" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_credentials_and_load_spec Exception failure: {_e}") finally: vx_ingest.close_cb() @@ -71,65 +62,61 @@ def test_cb_connect_disconnect(): local_time = [list(result)[0]] assert True, local_time is not None vx_ingest.close_cb() - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_cb_connect_disconnect Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_cb_connect_disconnect Exception failure: {_e}") finally: vx_ingest.close_cb() -def test_write_load_job_to_files(): +def test_write_load_job_to_files(tmp_path): """test write the load job""" vx_ingest = None try: vx_ingest = setup_connection() vx_ingest.load_job_id = "test_id" - vx_ingest.output_dir = "/tmp" + vx_ingest.output_dir = tmp_path vx_ingest.load_spec["load_job_doc"] = {"test": "a line of text"} vx_ingest.write_load_job_to_files() - os.remove("/tmp/test_id.json") - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_write_load_job_to_files Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_write_load_job_to_files Exception failure: {_e}") finally: vx_ingest.close_cb() -def test_build_load_job_doc(): +def test_build_load_job_doc(tmp_path): """test the build load job""" vx_ingest = None try: vx_ingest = setup_connection() vx_ingest.load_job_id = "test_id" - vx_ingest.path = "/tmp" + vx_ingest.path = tmp_path vx_ingest.load_spec["load_job_doc"] = {"test": "a line of text"} lineage = "CTC" ljd = vx_ingest.build_load_job_doc(lineage) assert True, ljd["id"].startswith( "LJ:METAR:vxingest.grib2_to_cb.run_ingest_threads:VXIngest" ) - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_build_load_job_doc Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_build_load_job_doc Exception failure: {_e}") finally: vx_ingest.close_cb() -def test_vxingest_get_file_list(): +def test_vxingest_get_file_list(tmp_path): """test the vxingest get_file_list""" vx_ingest = None try: vx_ingest = setup_connection() vx_ingest.load_job_id = "test_id" - if os.path.exists("/tmp/test"): - shutil.rmtree("/tmp/test") - os.mkdir("/tmp/test") # order is important to see if the files are getting returned sorted by mtime - Path("/tmp/test/f_fred_01").touch() - Path("/tmp/test/f_fred_02").touch() - Path("/tmp/test/f_fred_04").touch() - Path("/tmp/test/f_fred_05").touch() - Path("/tmp/test/f_fred_03").touch() - Path("/tmp/test/f_1_fred_01").touch() - Path("/tmp/test/f_2_fred_01").touch() - Path("/tmp/test/f_3_fred_01").touch() + Path(tmp_path / "f_fred_01").touch() + Path(tmp_path / "f_fred_02").touch() + Path(tmp_path / "f_fred_04").touch() + Path(tmp_path / "f_fred_05").touch() + Path(tmp_path / "f_fred_03").touch() + Path(tmp_path / "f_1_fred_01").touch() + Path(tmp_path / "f_2_fred_01").touch() + Path(tmp_path / "f_3_fred_01").touch() query = f""" SELECT url, mtime From `{vx_ingest.cb_credentials['bucket']}`.{vx_ingest.cb_credentials['scope']}.{vx_ingest.cb_credentials['collection']} WHERE @@ -138,16 +125,15 @@ def test_vxingest_get_file_list(): AND fileType='grib2' AND originType='model' AND model='HRRR_OPS' order by url;""" - files = vx_ingest.get_file_list(query, "/tmp/test", "f_fred_*") + files = vx_ingest.get_file_list(query, tmp_path, "f_fred_*") assert True, files == [ - "/tmp/test/f_fred_01", - "/tmp/test/f_fred_02", - "/tmp/test/f_fred_04", - "/tmp/test/f_fred_05", - "/tmp/test/f_fred_03", + tmp_path / "f_fred_01", + tmp_path / "f_fred_02", + tmp_path / "f_fred_04", + tmp_path / "f_fred_05", + tmp_path / "f_fred_03", ] - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_build_load_job_doc Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_build_load_job_doc Exception failure: {_e}") finally: - shutil.rmtree("/tmp/test") vx_ingest.close_cb() diff --git a/tests/vxingest/grib2_to_cb/test_unit_proj.py b/tests/vxingest/grib2_to_cb/test_unit_proj.py index 8b1e0c7..daa822a 100644 --- a/tests/vxingest/grib2_to_cb/test_unit_proj.py +++ b/tests/vxingest/grib2_to_cb/test_unit_proj.py @@ -1,25 +1,21 @@ -# pylint: disable=missing-module-docstring import os import pyproj +import pytest import xarray as xr from vxingest.grib2_to_cb.run_ingest_threads import VXIngest def setup_connection(): """test setup""" - try: - _vx_ingest = VXIngest() - _vx_ingest.credentials_file = os.environ["CREDENTIALS"] - _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) - _vx_ingest.connect_cb() - _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( - "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR" - ).content_as[dict]["ingest_document_ids"] - return _vx_ingest - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" - return None + _vx_ingest = VXIngest() + _vx_ingest.credentials_file = os.environ["CREDENTIALS"] + _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) + _vx_ingest.connect_cb() + _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( + "JOB-TEST:V01:METAR:GRIB2:MODEL:HRRR" + ).content_as[dict]["ingest_document_ids"] + return _vx_ingest def test_proj(): @@ -65,7 +61,7 @@ def test_proj(): init_transformer = pyproj.Transformer.from_proj( proj_from=latlon_proj, proj_to=init_projection ) - _x, _y = init_transformer.transform( # pylint: disable=unpacking-non-sequence + _x, _y = init_transformer.transform( lon_0, lat_0, radians=False ) # the lower left coordinates in the projection space @@ -116,7 +112,6 @@ def test_proj(): lon = row["lon"] if lat == -90 and lon == 180: continue # don't know how to transform that station - # pylint: disable=unpacking-non-sequence ( _x, _y, @@ -130,7 +125,7 @@ def test_proj(): y_gridpoint == 587.461349077341 ), "y_gridpoint is not 587.461349077341" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_proj Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_proj Exception failure: {_e}") finally: vx_ingest.close_cb() diff --git a/tests/vxingest/grib2_to_cb/test_unit_proj_nocb.py b/tests/vxingest/grib2_to_cb/test_unit_proj_nocb.py index cfdf4bc..7c3183c 100644 --- a/tests/vxingest/grib2_to_cb/test_unit_proj_nocb.py +++ b/tests/vxingest/grib2_to_cb/test_unit_proj_nocb.py @@ -1,96 +1,91 @@ -# pylint: disable=missing-module-docstring import pyproj import xarray as xr def test_proj_nocb(): """test the proj""" - try: - ds_height_above_ground_2m = xr.open_dataset( - "/opt/data/grib2_to_cb/hrrr_ops/input_files/2120013000018", - engine="cfgrib", - backend_kwargs={ - "filter_by_keys": { - "typeOfLevel": "heightAboveGround", - "stepType": "instant", - "level": 2, - }, - "read_keys": ["projString"], - "indexpath": "", + ds_height_above_ground_2m = xr.open_dataset( + "/opt/data/grib2_to_cb/hrrr_ops/input_files/2120013000018", + engine="cfgrib", + backend_kwargs={ + "filter_by_keys": { + "typeOfLevel": "heightAboveGround", + "stepType": "instant", + "level": 2, }, - ) - in_proj = pyproj.Proj(proj="latlon") - proj_string = ds_height_above_ground_2m.r2.attrs["GRIB_projString"] - max_x = ds_height_above_ground_2m.r2.attrs["GRIB_Nx"] - max_y = ds_height_above_ground_2m.r2.attrs["GRIB_Ny"] - spacing = ds_height_above_ground_2m.r2.attrs["GRIB_DxInMetres"] - latitude_of_first_grid_point_in_degrees = ds_height_above_ground_2m.r2.attrs[ - "GRIB_latitudeOfFirstGridPointInDegrees" - ] - longitude_of_first_grid_point_in_degrees = ds_height_above_ground_2m.r2.attrs[ - "GRIB_longitudeOfFirstGridPointInDegrees" - ] - proj_params = {} - for _v in proj_string.replace(" ", "").split("+")[1:]: - elem = _v.split("=") - proj_params[elem[0]] = elem[1] + "read_keys": ["projString"], + "indexpath": "", + }, + ) + in_proj = pyproj.Proj(proj="latlon") + proj_string = ds_height_above_ground_2m.r2.attrs["GRIB_projString"] + max_x = ds_height_above_ground_2m.r2.attrs["GRIB_Nx"] + max_y = ds_height_above_ground_2m.r2.attrs["GRIB_Ny"] + spacing = ds_height_above_ground_2m.r2.attrs["GRIB_DxInMetres"] + latitude_of_first_grid_point_in_degrees = ds_height_above_ground_2m.r2.attrs[ + "GRIB_latitudeOfFirstGridPointInDegrees" + ] + longitude_of_first_grid_point_in_degrees = ds_height_above_ground_2m.r2.attrs[ + "GRIB_longitudeOfFirstGridPointInDegrees" + ] + proj_params = {} + for _v in proj_string.replace(" ", "").split("+")[1:]: + elem = _v.split("=") + proj_params[elem[0]] = elem[1] - in_proj = pyproj.Proj(proj="latlon") - init_projection = pyproj.Proj(proj_params) - latlon_proj = pyproj.Proj(proj="latlon") - lat_0 = latitude_of_first_grid_point_in_degrees - lon_0 = longitude_of_first_grid_point_in_degrees + in_proj = pyproj.Proj(proj="latlon") + init_projection = pyproj.Proj(proj_params) + latlon_proj = pyproj.Proj(proj="latlon") + lat_0 = latitude_of_first_grid_point_in_degrees + lon_0 = longitude_of_first_grid_point_in_degrees - init_transformer = pyproj.Transformer.from_proj( - proj_from=latlon_proj, proj_to=init_projection - ) - _x, _y = init_transformer.transform( # pylint: disable=unpacking-non-sequence - lon_0, lat_0, radians=False - ) # the lower left coordinates in the projection space + init_transformer = pyproj.Transformer.from_proj( + proj_from=latlon_proj, proj_to=init_projection + ) + _x, _y = init_transformer.transform( + lon_0, lat_0, radians=False + ) # the lower left coordinates in the projection space - # Add the proper conversion to 'fool' Proj into setting 0,0 in the lower left corner of the domain - # NOTE: It doesn't actually do this, but it will be necessary to find x,y coordinates relative to the lower left corne - proj_params["x_0"] = abs(_x) - # offset the x,y points in the projection so that we get points oriented to bottm left - proj_params["y_0"] = abs(_y) - # Create Proj object - out_proj = pyproj.Proj(proj_params) + # Add the proper conversion to 'fool' Proj into setting 0,0 in the lower left corner of the domain + # NOTE: It doesn't actually do this, but it will be necessary to find x,y coordinates relative to the lower left corne + proj_params["x_0"] = abs(_x) + # offset the x,y points in the projection so that we get points oriented to bottm left + proj_params["y_0"] = abs(_y) + # Create Proj object + out_proj = pyproj.Proj(proj_params) - transformer = pyproj.Transformer.from_proj(proj_from=in_proj, proj_to=out_proj) - print( - "in_proj", - in_proj, - "out_proj", - out_proj, - "max_x", - max_x, - "max_y", - max_y, - "spacing", - spacing, - ) - assert ( - in_proj.definition_string() - == "proj=longlat datum=WGS84 no_defs ellps=WGS84 towgs84=0,0,0" - ), "in_proj definition_string is not 'proj=longlat datum=WGS84 no_defs ellps=WGS84 towgs84=0,0,0'" - assert ( - out_proj.definition_string() - == "proj=lcc lat_0=38.5 lon_0=262.5 lat_1=38.5 lat_2=38.5 x_0=2697520.14252193 y_0=1587306.15255666 R=6371229 units=m no_defs" - ), "out_proj is not 'proj=lcc lat_0=38.5 lon_0=262.5 lat_1=38.5 lat_2=38.5 x_0=2697520.14252193 y_0=1587306.15255666 R=6371229 units=m no_defs'" - assert max_x == 1799, "max_x is not 1799" - assert max_y == 1059, "max_y is not 1059" - assert spacing == 3000.0, "spacing is not 3000.0" + transformer = pyproj.Transformer.from_proj(proj_from=in_proj, proj_to=out_proj) + print( + "in_proj", + in_proj, + "out_proj", + out_proj, + "max_x", + max_x, + "max_y", + max_y, + "spacing", + spacing, + ) + assert ( + in_proj.definition_string() + == "proj=longlat datum=WGS84 no_defs ellps=WGS84 towgs84=0,0,0" + ), "in_proj definition_string is not 'proj=longlat datum=WGS84 no_defs ellps=WGS84 towgs84=0,0,0'" + assert ( + out_proj.definition_string() + == "proj=lcc lat_0=38.5 lon_0=262.5 lat_1=38.5 lat_2=38.5 x_0=2697520.14252193 y_0=1587306.15255666 R=6371229 units=m no_defs" + ), "out_proj is not 'proj=lcc lat_0=38.5 lon_0=262.5 lat_1=38.5 lat_2=38.5 x_0=2697520.14252193 y_0=1587306.15255666 R=6371229 units=m no_defs'" + assert max_x == 1799, "max_x is not 1799" + assert max_y == 1059, "max_y is not 1059" + assert spacing == 3000.0, "spacing is not 3000.0" - lat = 39.86 - lon = -104.66999 - # pylint: disable=unpacking-non-sequence - ( - _x, - _y, - ) = transformer.transform(lon, lat, radians=False) - x_gridpoint = _x / spacing - y_gridpoint = _y / spacing - assert x_gridpoint == 695.3172101518072, "x_gridpoint is not 695.3172101518072" - assert y_gridpoint == 587.461349077341, "y_gridpoint is not 587.461349077341" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_proj Exception failure: {_e}" + lat = 39.86 + lon = -104.66999 + ( + _x, + _y, + ) = transformer.transform(lon, lat, radians=False) + x_gridpoint = _x / spacing + y_gridpoint = _y / spacing + assert x_gridpoint == 695.3172101518072, "x_gridpoint is not 695.3172101518072" + assert y_gridpoint == 587.461349077341, "y_gridpoint is not 587.461349077341" diff --git a/tests/vxingest/grib2_to_cb/test_unit_queries_grib.py b/tests/vxingest/grib2_to_cb/test_unit_queries_grib.py index af1ceb2..156b3df 100644 --- a/tests/vxingest/grib2_to_cb/test_unit_queries_grib.py +++ b/tests/vxingest/grib2_to_cb/test_unit_queries_grib.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring import os from datetime import timedelta from pathlib import Path @@ -13,152 +12,121 @@ def connect_cb(): """ create a couchbase connection and maintain the collection and cluster objects. """ - # noinspection PyBroadException - try: - try: - cb_connection # is it defined pylint: disable=used-before-assignment - except NameError: - credentials_file = os.environ["CREDENTIALS"] - assert ( - Path(credentials_file).is_file() is True - ), f"*** credentials_file file {credentials_file} can not be found!" - _f = open(credentials_file, encoding="utf-8") - _yaml_data = yaml.load(_f, yaml.SafeLoader) - cb_connection = {} - cb_connection["host"] = _yaml_data["cb_host"] - cb_connection["user"] = _yaml_data["cb_user"] - cb_connection["password"] = _yaml_data["cb_password"] - cb_connection["bucket"] = _yaml_data["cb_bucket"] - cb_connection["collection"] = _yaml_data["cb_collection"] - cb_connection["scope"] = _yaml_data["cb_scope"] - _f.close() - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), - timeout_options=timeout_options, - ) - cb_connection["cluster"] = Cluster( - "couchbase://" + cb_connection["host"], options - ) - cb_connection["collection"] = ( - cb_connection["cluster"] - .bucket(cb_connection["bucket"]) - .collection(cb_connection["collection"]) - ) - return cb_connection - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_unit_queries Exception failure connecting: {_e}" + credentials_file = os.environ["CREDENTIALS"] + assert ( + Path(credentials_file).is_file() is True + ), f"*** credentials_file file {credentials_file} can not be found!" + with Path(credentials_file).open(encoding="utf-8") as _f: + _yaml_data = yaml.load(_f, yaml.SafeLoader) + cb_connection = {} + cb_connection["host"] = _yaml_data["cb_host"] + cb_connection["user"] = _yaml_data["cb_user"] + cb_connection["password"] = _yaml_data["cb_password"] + cb_connection["bucket"] = _yaml_data["cb_bucket"] + cb_connection["collection"] = _yaml_data["cb_collection"] + cb_connection["scope"] = _yaml_data["cb_scope"] + + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), + timeout_options=timeout_options, + ) + cb_connection["cluster"] = Cluster("couchbase://" + cb_connection["host"], options) + cb_connection["collection"] = ( + cb_connection["cluster"] + .bucket(cb_connection["bucket"]) + .collection(cb_connection["collection"]) + ) + return cb_connection def test_ingest_document_id(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.02 - testdata = Path( - "tests/vxingest/grib2_to_cb/testdata/test_ingest_document_id.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.02 + testdata = Path("tests/vxingest/grib2_to_cb/testdata/test_ingest_document_id.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_ingest_document_fields(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.01 - testdata = Path( - "tests/vxingest/grib2_to_cb/testdata/test_ingest_document_fields.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.01 + testdata = Path( + "tests/vxingest/grib2_to_cb/testdata/test_ingest_document_fields.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_df(request): - """test""" - try: - _name = request.node.name - _expected_time = 18 - testdata = Path("tests/vxingest/grib2_to_cb/testdata/test_get_DF.n1ql") - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 18 + testdata = Path("tests/vxingest/grib2_to_cb/testdata/test_get_DF.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_stations(request): - """test""" - try: - _name = request.node.name - _expected_time = 1 - testdata = Path("tests/vxingest/grib2_to_cb/testdata/test_get_stations.n1ql") - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 1 + testdata = Path("tests/vxingest/grib2_to_cb/testdata/test_get_stations.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_model_by_fcst_valid_epoch(request): - """test""" - try: - _name = request.node.name - _expected_time = 1.2 - testdata = Path( - "tests/vxingest/grib2_to_cb/testdata/test_get_model_by_fcstValidEpoch.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 1.2 + testdata = Path( + "tests/vxingest/grib2_to_cb/testdata/test_get_model_by_fcstValidEpoch.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" diff --git a/tests/vxingest/netcdf_to_cb/delta_hist.py b/tests/vxingest/netcdf_to_cb/delta_hist.py index 9ff4f53..e54f190 100644 --- a/tests/vxingest/netcdf_to_cb/delta_hist.py +++ b/tests/vxingest/netcdf_to_cb/delta_hist.py @@ -11,6 +11,7 @@ """ import argparse import sys +from pathlib import Path import plotly.express as px @@ -41,21 +42,20 @@ def runit(self, args): try: datasets = {} unitset = {} - f = open(self.delta_file) - lines = f.readlines() + with Path(self.delta_file).open(encoding="utf-8") as f: + lines = f.readlines() for x in lines: if x.startswith("var"): columns = x.split() field = columns[2] delta = columns[5] units = columns[6] - if field not in datasets.keys(): + if field not in datasets: datasets[field] = [] if delta == "None": continue datasets[field].append(float(delta)) unitset[field] = units - f.close() keys = datasets.keys() for field in keys: fig = px.histogram( @@ -65,8 +65,8 @@ def runit(self, args): labels={"x": unitset[field]}, ) fig.show() - except: - print("*** Error in HistBuilder ***" + str(sys.exc_info())) + except Exception as e: + print(f"*** Error in HistBuilder *** - {e}") def main(self): args = parse_args(sys.argv[1:]) diff --git a/tests/vxingest/netcdf_to_cb/test_int_metar_obs_netcdf.py b/tests/vxingest/netcdf_to_cb/test_int_metar_obs_netcdf.py index f532e47..7c83e82 100644 --- a/tests/vxingest/netcdf_to_cb/test_int_metar_obs_netcdf.py +++ b/tests/vxingest/netcdf_to_cb/test_int_metar_obs_netcdf.py @@ -16,150 +16,136 @@ """ import json import os -from glob import glob from multiprocessing import Queue +from pathlib import Path +import pytest from vxingest.netcdf_to_cb.run_ingest_threads import VXIngest -def stub_worker_log_configurer(queue: Queue): # pylint:disable=unused-argument +def stub_worker_log_configurer(queue: Queue): """A stub to replace log_config.worker_log_configurer""" - pass # pylint:disable=unnecessary-pass + pass def setup_connection(): """test setup""" - try: - _vx_ingest = VXIngest() - _vx_ingest.credentials_file = (os.environ["CREDENTIALS"],) - _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) - _vx_ingest.connect_cb() - _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( - "JOB-TEST:V01:METAR:NETCDF:OBS" - ).content_as[dict]["ingest_document_ids"] - return _vx_ingest - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" - return None - - -def ordered(obj): - """Utliity function to sort a dictionary so that it can be compared to another dictionary""" - if isinstance(obj, dict): - return sorted((k, ordered(v)) for k, v in obj.items()) - if isinstance(obj, list): - return sorted(ordered(x) for x in obj) - else: - return obj - - -def test_one_thread_specify_file_pattern(tmp_path): # pylint:disable=missing-function-docstring - try: - log_queue = Queue() - vx_ingest = VXIngest() - vx_ingest.runit( - { - "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", - "credentials_file": os.environ["CREDENTIALS"], - "file_name_mask": "%Y%m%d_%H%M", - "output_dir": f"{tmp_path}", - "threads": 1, - "file_pattern": "20211108_0000", - }, - log_queue, - stub_worker_log_configurer, - ) - assert ( - len(glob(f"{tmp_path}/[0123456789]???????_[0123456789]???.json")) > 0 - ), "There are no output files" - - assert ( - len( - glob( - f"{tmp_path}/LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" - ) - ) - == 1 - ), "there is no load job output file" - - # use file globbing to see if we got one output file for each input file plus one load job file - assert len(glob(f"{tmp_path}/20211108*.json")) == len( - glob("/opt/data/netcdf_to_cb/input_files/20211108_0000") - ), "number of output files is incorrect" - derived_data = json.load( - open(f"{tmp_path}/20211108_0000.json", encoding="utf-8") - ) - station_id = "" - derived_station = {} - obs_id = "" - derived_obs = {} - for item in derived_data: - if item["docType"] == "station" and item["name"] == "KDEN": - station_id = item["id"] - derived_station = item - else: - if item["docType"] == "obs": - obs_id = item["id"] - derived_obs = item - if derived_station and derived_obs: - break - retrieved_station = vx_ingest.collection.get(station_id).content_as[dict] - retrieved_obs = vx_ingest.collection.get(obs_id).content_as[dict] - # make sure the updateTime is the same in both the derived and retrieved station - retrieved_station["updateTime"] = derived_station["updateTime"] - # make sure the firstTime and lastTime are the same in both the derived and retrieved station['geo'] - retrieved_station["geo"][0]["firstTime"] = derived_station["geo"][0][ - "firstTime" - ] - retrieved_station["geo"][0]["lastTime"] = derived_station["geo"][0]["lastTime"] - assert ordered(derived_station) == ordered( - retrieved_station - ), "derived station does not match retrieved station" - assert ordered(derived_obs) == ordered( - retrieved_obs - ), "derived obs does not match retrieved obs" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGsdIngestManager Exception failure: {_e}" + _vx_ingest = VXIngest() + _vx_ingest.credentials_file = (os.environ["CREDENTIALS"],) + _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) + _vx_ingest.connect_cb() + _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( + "JOB-TEST:V01:METAR:NETCDF:OBS" + ).content_as[dict]["ingest_document_ids"] + return _vx_ingest + + +def assert_dicts_almost_equal(dict1, dict2, rel_tol=1e-09): + """Utility function to compare potentially nested dictionaries containing floats""" + assert set(dict1.keys()) == set( + dict2.keys() + ), "Dictionaries do not have the same keys" + for key in dict1: + if isinstance(dict1[key], dict): + assert_dicts_almost_equal(dict1[key], dict2[key], rel_tol) + else: + assert dict1[key] == pytest.approx( + dict2[key], rel=rel_tol + ), f"Values for {key} do not match" + + +def test_one_thread_specify_file_pattern(tmp_path): + log_queue = Queue() + vx_ingest = VXIngest() + vx_ingest.runit( + { + "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", + "credentials_file": os.environ["CREDENTIALS"], + "file_name_mask": "%Y%m%d_%H%M", + "output_dir": f"{tmp_path}", + "threads": 1, + "file_pattern": "20211108_0000", + }, + log_queue, + stub_worker_log_configurer, + ) + + # Test that we have one or more output files + output_file_list = list(tmp_path.glob("[0123456789]???????_[0123456789]???.json")) + assert len(output_file_list) > 0, "There are no output files" + + # Test that we have one "load job" ("LJ") document + lj_doc_regex = "LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" + num_load_job_files = len(list(tmp_path.glob(lj_doc_regex))) + assert num_load_job_files == 1, "there is no load job output file" + + # Test that we have one output file per input file + input_path = Path("/opt/data/netcdf_to_cb/input_files") + num_input_files = len(list(input_path.glob("20211108_0000"))) + num_output_files = len(list(tmp_path.glob("20211108*.json"))) + assert num_output_files == num_input_files, "number of output files is incorrect" + + # Test that the output file matches the content in the database + derived_data = json.load((tmp_path / "20211108_0000.json").open(encoding="utf-8")) + station_id = "" + derived_station = {} + obs_id = "" + derived_obs = {} + for item in derived_data: + if item["docType"] == "station" and item["name"] == "KDEN": + station_id = item["id"] + derived_station = item + else: + if item["docType"] == "obs": + obs_id = item["id"] + derived_obs = item + if derived_station and derived_obs: + break + retrieved_station = vx_ingest.collection.get(station_id).content_as[dict] + retrieved_obs = vx_ingest.collection.get(obs_id).content_as[dict] + # make sure the updateTime is the same in both the derived and retrieved station + retrieved_station["updateTime"] = derived_station["updateTime"] + # make sure the firstTime and lastTime are the same in both the derived and retrieved station['geo'] + retrieved_station["geo"][0]["firstTime"] = derived_station["geo"][0]["firstTime"] + retrieved_station["geo"][0]["lastTime"] = derived_station["geo"][0]["lastTime"] + + assert derived_station == retrieved_station + + assert_dicts_almost_equal(derived_obs, retrieved_obs) def test_two_threads_spedicfy_file_pattern(tmp_path): """ integration test for testing multithreaded capability """ - try: - log_queue = Queue() - vx_ingest = VXIngest() - vx_ingest.runit( - { - "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", - "credentials_file": os.environ["CREDENTIALS"], - "file_name_mask": "%Y%m%d_%H%M", - "output_dir": f"{tmp_path}", - "threads": 2, - "file_pattern": "20211105*", - }, - log_queue, - stub_worker_log_configurer, - ) - assert ( - len(glob(f"{tmp_path}/[0123456789]???????_[0123456789]???.json")) > 0 - ), "There are no output files" - - assert ( - len( - glob( - f"{tmp_path}/LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" - ) - ) - == 1 - ), "there is no load job output file" - - # use file globbing to see if we got one output file for each input file plus one load job file - assert len(glob(f"{tmp_path}/20211105*.json")) == len( - glob("/opt/data/netcdf_to_cb/input_files/20211105*") - ), "number of output files is incorrect" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGsdIngestManager Exception failure: {_e}" + log_queue = Queue() + vx_ingest = VXIngest() + vx_ingest.runit( + { + "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", + "credentials_file": os.environ["CREDENTIALS"], + "file_name_mask": "%Y%m%d_%H%M", + "output_dir": f"{tmp_path}", + "threads": 2, + "file_pattern": "20211105*", + }, + log_queue, + stub_worker_log_configurer, + ) + assert ( + len(list(tmp_path.glob("[0123456789]???????_[0123456789]???.json"))) > 0 + ), "There are no output files" + + lj_doc_regex = "LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" + assert ( + len(list(tmp_path.glob(lj_doc_regex))) == 1 + ), "there is no load job output file" + + # use file globbing to see if we got one output file for each input file plus one load job file + input_path = Path("/opt/data/netcdf_to_cb/input_files") + assert len(list(tmp_path.glob("20211105*.json"))) == len( + list(input_path.glob("20211105*")) + ), "number of output files is incorrect" def test_one_thread_default(tmp_path): @@ -168,43 +154,34 @@ def test_one_thread_default(tmp_path): TIP: you might want to use local credentials to a local couchbase. If you do you will need to run the scripts in the matsmetadata directory to load the local metadata. """ - try: - log_queue = Queue() - vx_ingest = VXIngest() - vx_ingest.runit( - { - "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", - "credentials_file": os.environ["CREDENTIALS"], - "file_name_mask": "%Y%m%d_%H%M", - "output_dir": f"{tmp_path}", - "file_pattern": "[0123456789]???????_[0123456789]???", - "threads": 1, - }, - log_queue, - stub_worker_log_configurer, - ) - assert ( - len(glob(f"{tmp_path}/[0123456789]???????_[0123456789]???.json")) > 0 - ), "There are no output files" - - assert ( - len( - glob( - f"{tmp_path}/LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" - ) - ) - >= 1 - ), "there is no load job output file" - - # use file globbing to see if we got one output file for each input file plus one load job file - assert len(glob(f"{tmp_path}/[0123456789]???????_[0123456789]???.json")) == len( - glob( - "/opt/data/netcdf_to_cb/input_files/[0123456789]???????_[0123456789]???" - ) - ), "number of output files is incorrect" - - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGsdIngestManager Exception failure: {_e}" + log_queue = Queue() + vx_ingest = VXIngest() + vx_ingest.runit( + { + "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", + "credentials_file": os.environ["CREDENTIALS"], + "file_name_mask": "%Y%m%d_%H%M", + "output_dir": f"{tmp_path}", + "file_pattern": "[0123456789]???????_[0123456789]???", + "threads": 1, + }, + log_queue, + stub_worker_log_configurer, + ) + assert ( + len(list(tmp_path.glob("[0123456789]???????_[0123456789]???.json"))) > 0 + ), "There are no output files" + + lj_doc_regex = "LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" + assert ( + len(list(tmp_path.glob(lj_doc_regex))) >= 1 + ), "there is no load job output file" + + # use file globbing to see if we got one output file for each input file plus one load job file + input_path = Path("/opt/data/netcdf_to_cb/input_files") + assert len(list(tmp_path.glob("[0123456789]???????_[0123456789]???.json"))) == len( + list(input_path.glob("[0123456789]???????_[0123456789]???")) + ), "number of output files is incorrect" def test_two_threads_default(tmp_path): @@ -213,62 +190,30 @@ def test_two_threads_default(tmp_path): TIP: you might want to use local credentials to a local couchbase. If you do you will need to run the scripts in the matsmetadata directory to load the local metadata. """ - try: - log_queue = Queue() - vx_ingest = VXIngest() - vx_ingest.runit( - { - "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", - "credentials_file": os.environ["CREDENTIALS"], - "file_name_mask": "%Y%m%d_%H%M", - "output_dir": f"{tmp_path}", - "threads": 2, - }, - log_queue, - stub_worker_log_configurer, - ) - assert ( - len(glob(f"{tmp_path}/[0123456789]???????_[0123456789]???.json")) > 0 - ), "There are no output files" - - assert ( - len( - glob( - f"{tmp_path}/LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" - ) - ) - >= 1 - ), "there is no load job output file" - - # use file globbing to see if we got one output file for each input file plus one load job file - assert len(glob(f"{tmp_path}/[0123456789]???????_[0123456789]???.json")) == len( - glob( - "/opt/data/netcdf_to_cb/input_files/[0123456789]???????_[0123456789]???" - ) - ), "number of output files is incorrect" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGsdIngestManager Exception failure: {_e}" - - -def check_mismatched_fcst_valid_epoch_to_id(): - """This is a simple ultility test that can be used to see if there are - any missmatched fcstValidEpoch values among the observations i.e. the fcstValidEpoch in the id - does not match the fcstValidEpoch in the top level fcstValidEpoch field""" - try: - vx_ingest = setup_connection() - cluster = vx_ingest.cluster - result = cluster.query( - f""" - select METAR.fcstValidEpoch, METAR.id - FROM `{vx_ingest.cb_credentials['bucket']}`.{vx_ingest.cb_credentials['scope']}.{vx_ingest.cb_credentials['collection']} - WHERE - docType = "obs" - AND subset = "METAR" - AND type = "DD" - AND version = "V01" - AND NOT CONTAINS(id,to_string(fcstValidEpoch)) """ - ) - for row in result: - assert False, f"These do not have the same fcstValidEpoch: {str(row['fcstValidEpoch']) + row['id']}" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGsdIngestManager Exception failure: {_e}" + log_queue = Queue() + vx_ingest = VXIngest() + vx_ingest.runit( + { + "job_id": "JOB-TEST:V01:METAR:NETCDF:OBS", + "credentials_file": os.environ["CREDENTIALS"], + "file_name_mask": "%Y%m%d_%H%M", + "output_dir": f"{tmp_path}", + "threads": 2, + }, + log_queue, + stub_worker_log_configurer, + ) + assert ( + len(list(tmp_path.glob("[0123456789]???????_[0123456789]???.json"))) > 0 + ), "There are no output files" + + lj_doc_regex = "LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest:*.json" + assert ( + len(list(tmp_path.glob(lj_doc_regex))) >= 1 + ), "there is no load job output file" + + # use file globbing to see if we got one output file for each input file plus one load job file + input_path = Path("/opt/data/netcdf_to_cb/input_files") + assert len(list(tmp_path.glob("[0123456789]???????_[0123456789]???.json"))) == len( + list(input_path.glob("[0123456789]???????_[0123456789]???")) + ), "number of output files is incorrect" diff --git a/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py b/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py index a6bf9ae..2f0aa92 100644 --- a/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py +++ b/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py @@ -1,6 +1,4 @@ -# pylint:disable=missing-module-docstring import os -import shutil import time from copy import deepcopy from datetime import datetime @@ -8,6 +6,7 @@ import netCDF4 as nc import numpy as np +import pytest from couchbase.mutation_state import MutationState from couchbase.n1ql import QueryScanConsistency from couchbase.options import QueryOptions @@ -21,18 +20,14 @@ def setup_connection(): """test setup""" - try: - _vx_ingest = VXIngest() - _vx_ingest.credentials_file = os.environ["CREDENTIALS"] - _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) - _vx_ingest.connect_cb() - _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( - "JOB:V01:METAR:NETCDF:OBS" - ).content_as[dict]["ingest_document_ids"] - return _vx_ingest - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" - return None + _vx_ingest = VXIngest() + _vx_ingest.credentials_file = os.environ["CREDENTIALS"] + _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) + _vx_ingest.connect_cb() + _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( + "JOB:V01:METAR:NETCDF:OBS" + ).content_as[dict]["ingest_document_ids"] + return _vx_ingest def test_credentials_and_load_spec(): @@ -40,8 +35,8 @@ def test_credentials_and_load_spec(): try: vx_ingest = setup_connection() assert vx_ingest.load_spec["cb_connection"]["user"], "cb_user" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_credentials_and_load_spec Exception failure: {_e}") finally: vx_ingest.close_cb() @@ -54,41 +49,40 @@ def test_cb_connect_disconnect(): local_time = [list(result)[0]] assert local_time is not None vx_ingest.close_cb() - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_cb_connect_disconnect Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_cb_connect_disconnect Exception failure: {_e}") finally: vx_ingest.close_cb() -def test_write_load_job_to_files(): +def test_write_load_job_to_files(tmp_path): """test write the load job""" try: vx_ingest = setup_connection() vx_ingest.load_job_id = "test_id" - vx_ingest.output_dir = "/tmp" + vx_ingest.output_dir = tmp_path vx_ingest.load_spec["load_job_doc"] = {"test": "a line of text"} vx_ingest.write_load_job_to_files() - assert Path("/tmp/test_id.json").is_file() - os.remove("/tmp/test_id.json") - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_write_load_job_to_files Exception failure: {_e}" + assert Path(tmp_path / "test_id.json").is_file() + except Exception as _e: + pytest.fail(f"test_write_load_job_to_files Exception failure: {_e}") finally: vx_ingest.close_cb() -def test_build_load_job_doc(): +def test_build_load_job_doc(tmp_path): """test the build load job""" try: vx_ingest = setup_connection() vx_ingest.load_job_id = "test_id" - vx_ingest.path = "/tmp" + vx_ingest.path = tmp_path vx_ingest.load_spec["load_job_doc"] = {"test": "a line of text"} ljd = vx_ingest.build_load_job_doc(vx_ingest.path) assert ljd["id"].startswith( "LJ:METAR:vxingest.netcdf_to_cb.run_ingest_threads:VXIngest" ) - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_build_load_job_doc Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_build_load_job_doc Exception failure: {_e}") finally: vx_ingest.close_cb() @@ -100,13 +94,13 @@ def test_umask_value_transform(): """ try: # first we have to create a netcdf dataset and a temperature variable - _nc = nc.Dataset( # pylint:disable=no-member + _nc = nc.Dataset( "inmemory.nc", format="NETCDF3_CLASSIC", mode="w", memory=1028, fill_value=3.402823e38, - ) # pylint:disable=no-member + ) _d = _nc.createDimension("recNum", None) _v = _nc.createVariable("temperature", float, ("recNum")) _v.units = "kelvin" @@ -129,14 +123,14 @@ def test_umask_value_transform(): # call the handler temp = builder.umask_value_transform(params_dict) assert temp == 250.15 - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_build_load_job_doc Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_build_load_job_doc Exception failure: {_e}") finally: vx_ingest.close_cb() _nc.close() # close returns memoryview -def test_vxingest_get_file_list(): +def test_vxingest_get_file_list(tmp_path): """test the vxingest get_file_list""" try: vx_ingest = setup_connection() @@ -153,21 +147,18 @@ def test_vxingest_get_file_list(): "projection": "lambert_conformal_conic", "subset": "metar", "type": "DF", - "url": "/tmp/test/f_fred_01", + "url": str(tmp_path / "f_fred_01"), } vx_ingest.collection.upsert("DF:metar:grib2:HRRR_OPS:f_fred_01", df_record) - if os.path.exists("/tmp/test"): - shutil.rmtree("/tmp/test") - os.mkdir("/tmp/test") # order is important to see if the files are getting returned sorted by mtime - Path("/tmp/test/f_fred_01").touch() - Path("/tmp/test/f_fred_02").touch() - Path("/tmp/test/f_fred_04").touch() - Path("/tmp/test/f_fred_05").touch() - Path("/tmp/test/f_fred_03").touch() - Path("/tmp/test/f_1_fred_01").touch() - Path("/tmp/test/f_2_fred_01").touch() - Path("/tmp/test/f_3_fred_01").touch() + Path(tmp_path / "f_fred_01").touch() + Path(tmp_path / "f_fred_02").touch() + Path(tmp_path / "f_fred_04").touch() + Path(tmp_path / "f_fred_05").touch() + Path(tmp_path / "f_fred_03").touch() + Path(tmp_path / "f_1_fred_01").touch() + Path(tmp_path / "f_2_fred_01").touch() + Path(tmp_path / "f_3_fred_01").touch() query = f""" SELECT url, mtime From `{vx_ingest.cb_credentials['bucket']}`.{vx_ingest.cb_credentials['scope']}.{vx_ingest.cb_credentials['collection']} WHERE @@ -177,16 +168,16 @@ def test_vxingest_get_file_list(): AND originType='model' AND model='HRRR_OPS' order by url;""" # should get f_fred_01 because the mtime in the DF record is old. The file is newer. - files = vx_ingest.get_file_list(query, "/tmp/test", "f_fred_*") + files = vx_ingest.get_file_list(query, tmp_path, "f_fred_*") assert set(files) == set( [ - "/tmp/test/f_fred_01", - "/tmp/test/f_fred_02", - "/tmp/test/f_fred_04", - "/tmp/test/f_fred_05", - "/tmp/test/f_fred_03", + str(tmp_path / "f_fred_01"), + str(tmp_path / "f_fred_02"), + str(tmp_path / "f_fred_04"), + str(tmp_path / "f_fred_05"), + str(tmp_path / "f_fred_03"), ] - ), "get_file_list wrong list" + ), "get_file_list 1 wrong list" # update the mtime in the df record so that the file will not be included df_record["mtime"] = round(time.time()) vx_ingest.collection.upsert("DF:metar:grib2:HRRR_OPS:f_fred_01", df_record) @@ -195,125 +186,116 @@ def test_vxingest_get_file_list(): vx_ingest.cluster.query( query, QueryOptions(scan_consistency=QueryScanConsistency.REQUEST_PLUS) ) - files = vx_ingest.get_file_list(query, "/tmp/test", "f_fred_*") + files = vx_ingest.get_file_list(query, tmp_path, "f_fred_*") # should not get f_fred_01 because the DF record has a newer mtime assert set(files) == set( [ - "/tmp/test/f_fred_02", - "/tmp/test/f_fred_04", - "/tmp/test/f_fred_05", - "/tmp/test/f_fred_03", + str(tmp_path / "f_fred_02"), + str(tmp_path / "f_fred_04"), + str(tmp_path / "f_fred_05"), + str(tmp_path / "f_fred_03"), ] - ), "get_file_list wrong list" + ), "get_file_list 2 wrong list" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_build_load_job_doc Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_build_load_job_doc Exception failure: {_e}") finally: vx_ingest.collection.remove("DF:metar:grib2:HRRR_OPS:f_fred_01") - shutil.rmtree("/tmp/test") vx_ingest.close_cb() def test_interpolate_time(): """test the interpolate time routine in netcdf_builder""" - try: - vx_ingest = setup_connection() - _cluster = vx_ingest.cluster - _collection = vx_ingest.collection - _load_spec = vx_ingest.load_spec - _ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] - _ingest_document = _collection.get(_ingest_document_ids[0]).content_as[dict] - _builder = NetcdfMetarObsBuilderV01(_load_spec, _ingest_document) - for delta in [ - 0, - -1, - 1, - -1799, - 1799, - -1800, - 1800, - -1801, - 1801, - -3599, - 3599, - -3600, - 3600, - -3601, - 3601, - ]: - _t = np.array([1636390800 + delta]) - _t.view(np.ma.MaskedArray) - t_interpolated = _builder.interpolate_time({"timeObs": _t}) - print( - "for an offset: " - + str(delta) - + " results in interpolation: " - + str(t_interpolated) - ) - if delta >= -1800 and delta <= 1799: - assert ( - 1636390800 == t_interpolated - ), f"1636390800 interpolated to {t_interpolated} is not equal" - if delta <= -1801: - assert ( - 1636390800 - 3600 == t_interpolated - ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" - if delta >= 1800: - assert ( - 1636390800 + 3600 == t_interpolated - ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_interpolate_time Exception failure: {_e}" + vx_ingest = setup_connection() + _cluster = vx_ingest.cluster + _collection = vx_ingest.collection + _load_spec = vx_ingest.load_spec + _ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] + _ingest_document = _collection.get(_ingest_document_ids[0]).content_as[dict] + _builder = NetcdfMetarObsBuilderV01(_load_spec, _ingest_document) + for delta in [ + 0, + -1, + 1, + -1799, + 1799, + -1800, + 1800, + -1801, + 1801, + -3599, + 3599, + -3600, + 3600, + -3601, + 3601, + ]: + _t = np.array([1636390800 + delta]) + _t.view(np.ma.MaskedArray) + t_interpolated = _builder.interpolate_time({"timeObs": _t}) + print( + "for an offset: " + + str(delta) + + " results in interpolation: " + + str(t_interpolated) + ) + if delta >= -1800 and delta <= 1799: + assert ( + t_interpolated == 1636390800 + ), f"1636390800 interpolated to {t_interpolated} is not equal" + if delta <= -1801: + assert ( + 1636390800 - 3600 == t_interpolated + ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" + if delta >= 1800: + assert ( + 1636390800 + 3600 == t_interpolated + ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" def test_interpolate_time_iso(): """test the interpolate time routine in netcdf_builder""" - try: - vx_ingest = setup_connection() - _cluster = vx_ingest.cluster - _collection = vx_ingest.collection - load_spec = vx_ingest.load_spec - ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] - ingest_document = _collection.get(ingest_document_ids[0]).content_as[dict] - _builder = NetcdfMetarObsBuilderV01(load_spec, ingest_document) - for delta in [ - 0, - -1, - 1, - -1799, - 1799, - -1800, - 1800, - -1801, - 1801, - -3599, - 3599, - -3600, - 3600, - -3601, - 3601, - ]: - _t = np.array([1636390800 + delta]) - _t.view(np.ma.MaskedArray) - t_interpolated = _builder.interpolate_time_iso({"timeObs": _t}) - if delta >= -1800 and delta <= 1799: - assert ( - (datetime.utcfromtimestamp(1636390800).isoformat()) - == t_interpolated - ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" - if delta <= -1801: - assert ( - (datetime.utcfromtimestamp(1636390800 - 3600).isoformat()) - == t_interpolated - ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" - if delta >= 1800: - assert ( - datetime.utcfromtimestamp(1636390800 + 3600).isoformat() - == t_interpolated - ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" - - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_interpolate_time_iso Exception failure: {_e}" + vx_ingest = setup_connection() + _cluster = vx_ingest.cluster + _collection = vx_ingest.collection + load_spec = vx_ingest.load_spec + ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] + ingest_document = _collection.get(ingest_document_ids[0]).content_as[dict] + _builder = NetcdfMetarObsBuilderV01(load_spec, ingest_document) + for delta in [ + 0, + -1, + 1, + -1799, + 1799, + -1800, + 1800, + -1801, + 1801, + -3599, + 3599, + -3600, + 3600, + -3601, + 3601, + ]: + _t = np.array([1636390800 + delta]) + _t.view(np.ma.MaskedArray) + t_interpolated = _builder.interpolate_time_iso({"timeObs": _t}) + if delta >= -1800 and delta <= 1799: + assert ( + (datetime.utcfromtimestamp(1636390800).isoformat()) == t_interpolated + ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" + if delta <= -1801: + assert ( + (datetime.utcfromtimestamp(1636390800 - 3600).isoformat()) + == t_interpolated + ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" + if delta >= 1800: + assert ( + datetime.utcfromtimestamp(1636390800 + 3600).isoformat() + == t_interpolated + ), f"{1636390800 - delta} interpolated to {t_interpolated} is not equal" def test_handle_station(): @@ -339,18 +321,13 @@ def test_handle_station(): _pattern = "%Y%m%d_%H%M" # fmask is usually set in the run_ingest_threads _builder.load_spec["fmask"] = _pattern - _builder.ncdf_data_set = nc.Dataset( # pylint:disable=no-member + _builder.ncdf_data_set = nc.Dataset( "/opt/data/netcdf_to_cb/input_files/20211108_0000" ) rec_num_length = _builder.ncdf_data_set["stationName"].shape[0] # find the rec_num of the stationName ZBAA for i in range(rec_num_length): - if ( - str( - nc.chartostring(_builder.ncdf_data_set["stationName"][i]) # pylint: disable=no-member - ) # pylint: disable=no-member - == "ZBAA" - ): # pylint:disable=no-member + if str(nc.chartostring(_builder.ncdf_data_set["stationName"][i])) == "ZBAA": break _rec_num = i # use a station that is in the netcdf file but is not used in any of our domains. @@ -395,10 +372,7 @@ def test_handle_station(): QueryOptions(scan_consistency=QueryScanConsistency.REQUEST_PLUS), ) result_list = list(result) - if len(result_list) > 0: - station_zbaa = result_list[0] - else: - station_zbaa = None + station_zbaa = result_list[0] if len(result_list) > 0 else None # keep a copy of station_zbaa around for future use station_zbaa_copy = deepcopy(station_zbaa) if station_zbaa_copy is not None: @@ -529,8 +503,8 @@ def test_handle_station(): new_station_zbaa["geo"][0]["firstTime"] = orig_first_time assert_station(_cluster, new_station_zbaa, _builder) cleanup_builder_doc(_cluster, _collection, _builder, station_zbaa_copy) - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_handle_station Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_handle_station Exception failure: {_e}") finally: # upsert the original ZBAA station document station_zbaa["geo"].pop(0) @@ -634,7 +608,7 @@ def assert_station(cluster, station_zbaa, builder): assert new_station_zbaa["name"], "new station name is missing" assert new_station_zbaa["updateTime"], "new station updateTime is missing" assert new_station_zbaa["geo"], "new station geo is missing" - assert 1 == len(new_station_zbaa["geo"]), "new station geo is not length 1" + assert len(new_station_zbaa["geo"]) == 1, "new station geo is not length 1" return assert new_station_zbaa["description"] == station_zbaa["description"], ( "new 'description'" @@ -704,43 +678,33 @@ def assert_station(cluster, station_zbaa, builder): def test_derive_valid_time_epoch(): """test the derive_valid_time_epoch routine in netcdf_builder""" - try: - vx_ingest = setup_connection() - _collection = vx_ingest.collection - load_spec = vx_ingest.load_spec - ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] - ingest_document = _collection.get(ingest_document_ids[0]).content_as[dict] - _builder = NetcdfMetarObsBuilderV01(load_spec, ingest_document) - _builder.file_name = "20211108_0000" - _pattern = "%Y%m%d_%H%M" - _file_utc_time = datetime.strptime(_builder.file_name, _pattern) - expected_epoch = (_file_utc_time - datetime(1970, 1, 1)).total_seconds() - derived_epoch = _builder.derive_valid_time_epoch( - {"file_name_pattern": _pattern} - ) - assert ( - expected_epoch == derived_epoch - ), f"derived epoch {derived_epoch} is not equal to 1636329600" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_derive_valid_time_epoch Exception failure: {_e}" + vx_ingest = setup_connection() + _collection = vx_ingest.collection + load_spec = vx_ingest.load_spec + ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] + ingest_document = _collection.get(ingest_document_ids[0]).content_as[dict] + _builder = NetcdfMetarObsBuilderV01(load_spec, ingest_document) + _builder.file_name = "20211108_0000" + _pattern = "%Y%m%d_%H%M" + _file_utc_time = datetime.strptime(_builder.file_name, _pattern) + expected_epoch = (_file_utc_time - datetime(1970, 1, 1)).total_seconds() + derived_epoch = _builder.derive_valid_time_epoch({"file_name_pattern": _pattern}) + assert ( + expected_epoch == derived_epoch + ), f"derived epoch {derived_epoch} is not equal to 1636329600" def test_derive_valid_time_iso(): """test the derive_valid_time_iso routine in netcdf_builder""" - try: - vx_ingest = setup_connection() - _cluster = vx_ingest.cluster - _collection = vx_ingest.collection - load_spec = vx_ingest.load_spec - ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] - ingest_document = _collection.get(ingest_document_ids[0]).content_as[dict] - _builder = NetcdfMetarObsBuilderV01(load_spec, ingest_document) - _builder.file_name = "20211108_0000" - derived_epoch = _builder.derive_valid_time_iso( - {"file_name_pattern": "%Y%m%d_%H%M"} - ) - assert ( - "2021-11-08T00:00:00Z" == derived_epoch - ), f"derived epoch {derived_epoch} is not equal to 1636390800" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_derive_valid_time_epoch Exception failure: {_e}" + vx_ingest = setup_connection() + _cluster = vx_ingest.cluster + _collection = vx_ingest.collection + load_spec = vx_ingest.load_spec + ingest_document_ids = vx_ingest.load_spec["ingest_document_ids"] + ingest_document = _collection.get(ingest_document_ids[0]).content_as[dict] + _builder = NetcdfMetarObsBuilderV01(load_spec, ingest_document) + _builder.file_name = "20211108_0000" + derived_epoch = _builder.derive_valid_time_iso({"file_name_pattern": "%Y%m%d_%H%M"}) + assert ( + derived_epoch == "2021-11-08T00:00:00Z" + ), f"derived epoch {derived_epoch} is not equal to 1636390800" diff --git a/tests/vxingest/netcdf_to_cb/test_unit_queries_obs.py b/tests/vxingest/netcdf_to_cb/test_unit_queries_obs.py index 947de63..0350af3 100644 --- a/tests/vxingest/netcdf_to_cb/test_unit_queries_obs.py +++ b/tests/vxingest/netcdf_to_cb/test_unit_queries_obs.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring import os from datetime import timedelta from pathlib import Path @@ -13,133 +12,105 @@ def connect_cb(): """ create a couchbase connection and maintain the collection and cluster objects. """ - # noinspection PyBroadException - try: - try: - cb_connection # pylint: disable = used-before-assignment - except NameError: - credentials_file = os.environ["CREDENTIALS"] - assert ( - Path(credentials_file).is_file() is True - ), f"*** credentials_file file {credentials_file} can not be found!" - _f = open(credentials_file, encoding="utf-8") - _yaml_data = yaml.load(_f, yaml.SafeLoader) - cb_connection = {} - cb_connection["host"] = _yaml_data["cb_host"] - cb_connection["user"] = _yaml_data["cb_user"] - cb_connection["password"] = _yaml_data["cb_password"] - cb_connection["bucket"] = _yaml_data["cb_bucket"] - cb_connection["collection"] = _yaml_data["cb_collection"] - cb_connection["scope"] = _yaml_data["cb_scope"] - _f.close() + credentials_file = os.environ["CREDENTIALS"] + assert ( + Path(credentials_file).is_file() is True + ), f"*** credentials_file file {credentials_file} can not be found!" + with Path(credentials_file).open(encoding="utf-8") as _f: + _yaml_data = yaml.load(_f, yaml.SafeLoader) + cb_connection = {} + cb_connection["host"] = _yaml_data["cb_host"] + cb_connection["user"] = _yaml_data["cb_user"] + cb_connection["password"] = _yaml_data["cb_password"] + cb_connection["bucket"] = _yaml_data["cb_bucket"] + cb_connection["collection"] = _yaml_data["cb_collection"] + cb_connection["scope"] = _yaml_data["cb_scope"] - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), - timeout_options=timeout_options, - ) - cb_connection["cluster"] = Cluster( - "couchbase://" + cb_connection["host"], options - ) - cb_connection["collection"] = ( - cb_connection["cluster"] - .bucket(cb_connection["bucket"]) - .collection(cb_connection["collection"]) - ) - return cb_connection - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_unit_queries Exception failure connecting: {_e}" + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), + timeout_options=timeout_options, + ) + cb_connection["cluster"] = Cluster("couchbase://" + cb_connection["host"], options) + cb_connection["collection"] = ( + cb_connection["cluster"] + .bucket(cb_connection["bucket"]) + .collection(cb_connection["collection"]) + ) + return cb_connection def test_ingest_document_id(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.005 - testdata = Path( - "tests/vxingest/netcdf_to_cb/testdata/test_ingest_document_id.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - metadata = result.metadata() - metrics = metadata.metrics() - elapsed_time = metrics.elapsed_time() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time.total_seconds() < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.005 + testdata = Path("tests/vxingest/netcdf_to_cb/testdata/test_ingest_document_id.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + metadata = result.metadata() + metrics = metadata.metrics() + elapsed_time = metrics.elapsed_time() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time.total_seconds() < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_ingest_document_fields(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.005 - testdata = Path( - "tests/vxingest/netcdf_to_cb/testdata/test_ingest_document_fields.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.005 + testdata = Path( + "tests/vxingest/netcdf_to_cb/testdata/test_ingest_document_fields.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_stations(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.01 - testdata = Path("tests/vxingest/netcdf_to_cb/testdata/test_get_stations.n1ql") - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.01 + testdata = Path("tests/vxingest/netcdf_to_cb/testdata/test_get_stations.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_obs_by_fcst_valid_epoch(request): - """test""" - try: - _name = request.node.name - _expected_time = 1 - testdata = Path( - "tests/vxingest/netcdf_to_cb/testdata/test_get_obs_by_fcstValidEpoch.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 1 + testdata = Path( + "tests/vxingest/netcdf_to_cb/testdata/test_get_obs_by_fcstValidEpoch.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" diff --git a/tests/vxingest/partial_sums_to_cb/test_int_metar_partial_sums.py b/tests/vxingest/partial_sums_to_cb/test_int_metar_partial_sums.py index 6f07e13..75ca22a 100644 --- a/tests/vxingest/partial_sums_to_cb/test_int_metar_partial_sums.py +++ b/tests/vxingest/partial_sums_to_cb/test_int_metar_partial_sums.py @@ -1,8 +1,6 @@ -# pylint: disable=too-many-lines """ _test for VxIngest SUMS builders """ -import glob import json import os import time @@ -32,58 +30,54 @@ stations = [] -def stub_worker_log_configurer(queue: Queue): # pylint: disable=unused-argument +def stub_worker_log_configurer(queue: Queue): """A stub to replace log_config.worker_log_configurer""" - pass # pylint: disable=unnecessary-pass + pass def test_check_fcst_valid_epoch_fcst_valid_iso(): """ integration test to check fcst_valid_epoch is derived correctly """ - try: - credentials_file = os.environ["CREDENTIALS"] - assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf-8") + credentials_file = os.environ["CREDENTIALS"] + assert Path(credentials_file).is_file(), "credentials_file Does not exist" + with Path(credentials_file).open(encoding="utf-8") as _f: yaml_data = yaml.load(_f, yaml.SafeLoader) - _host = yaml_data["cb_host"] - _user = yaml_data["cb_user"] - _password = yaml_data["cb_password"] - _bucket = yaml_data["cb_bucket"] - _collection = yaml_data["cb_collection"] - _scope = yaml_data["cb_scope"] - _f.close() + _host = yaml_data["cb_host"] + _user = yaml_data["cb_user"] + _password = yaml_data["cb_password"] + _bucket = yaml_data["cb_bucket"] + _collection = yaml_data["cb_collection"] + _scope = yaml_data["cb_scope"] - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(_user, _password), timeout_options=timeout_options - ) - cluster = Cluster("couchbase://" + _host, options) - options = ClusterOptions(PasswordAuthenticator(_user, _password)) - cluster = Cluster("couchbase://" + _host, options) - stmnt = f"""SELECT m0.fcstValidEpoch fve, fcstValidISO fvi - FROM `{_bucket}`.{_scope}.{_collection} m0 - WHERE - m0.type='DD' - AND m0.docType='PARTAILSUMS' - AND m0.subset='{_collection}' - AND m0.version='V01' - AND m0.model='HRRR_OPS' - AND m0.region='ALL_HRRR' - """ - result = cluster.query(stmnt) - for row in result: - fve = row["fve"] - utc_time = datetime.strptime(row["fvi"], "%Y-%m-%dT%H:%M:%S") - epoch_time = int((utc_time - datetime(1970, 1, 1)).total_seconds()) - assert ( - fve == epoch_time - ), "fcstValidEpoch and fcstValidIso are not the same time" - assert (fve % 3600) == 0, "fcstValidEpoch is not at top of hour" - except Exception as _e: # pylint: disable=broad-except, disable=broad-except - assert False, f"TestGsdIngestManager.test_check_fcstValidEpoch_fcstValidIso Exception failure: {_e}" + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(_user, _password), timeout_options=timeout_options + ) + cluster = Cluster("couchbase://" + _host, options) + options = ClusterOptions(PasswordAuthenticator(_user, _password)) + cluster = Cluster("couchbase://" + _host, options) + stmnt = f"""SELECT m0.fcstValidEpoch fve, fcstValidISO fvi + FROM `{_bucket}`.{_scope}.{_collection} m0 + WHERE + m0.type='DD' + AND m0.docType='PARTAILSUMS' + AND m0.subset='{_collection}' + AND m0.version='V01' + AND m0.model='HRRR_OPS' + AND m0.region='ALL_HRRR' + """ + result = cluster.query(stmnt) + for row in result: + fve = row["fve"] + utc_time = datetime.strptime(row["fvi"], "%Y-%m-%dT%H:%M:%S") + epoch_time = int((utc_time - datetime(1970, 1, 1)).total_seconds()) + assert ( + fve == epoch_time + ), "fcstValidEpoch and fcstValidIso are not the same time" + assert (fve % 3600) == 0, "fcstValidEpoch is not at top of hour" def test_get_stations_geo_search(): @@ -91,93 +85,89 @@ def test_get_stations_geo_search(): Currently we know that there are differences between the geo search stations list and the legacy stations list. This test does show those differences. The assertion is commented out. """ - try: - credentials_file = os.environ["CREDENTIALS"] - assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf-8") + credentials_file = os.environ["CREDENTIALS"] + assert Path(credentials_file).is_file(), "credentials_file Does not exist" + with Path(credentials_file).open(encoding="utf-8") as _f: yaml_data = yaml.load(_f, yaml.SafeLoader) - _host = yaml_data["cb_host"] - _user = yaml_data["cb_user"] - _password = yaml_data["cb_password"] - _bucket = yaml_data["cb_bucket"] - _collection = yaml_data["cb_collection"] - _scope = yaml_data["cb_scope"] - _f.close() + _host = yaml_data["cb_host"] + _user = yaml_data["cb_user"] + _password = yaml_data["cb_password"] + _bucket = yaml_data["cb_bucket"] + _collection = yaml_data["cb_collection"] + _scope = yaml_data["cb_scope"] - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(_user, _password), timeout_options=timeout_options + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(_user, _password), timeout_options=timeout_options + ) + cluster = Cluster("couchbase://" + _host, options) + collection = cluster.bucket(_bucket).scope(_scope).collection(_collection) + load_spec = {} + load_spec["cluster"] = cluster + load_spec["collection"] = collection + load_spec["ingest_document_ids"] = [ + f"MD:V01:{_collection}:HRRR_OPS:ALL_HRRR:SUMS:SURFACE:ingest" + ] + # get the ingest document id. + ingest_document_result = collection.get( + f"MD-TEST:V01:{_collection}:HRRR_OPS:ALL_HRRR:SUMS:SURFACE:ingest" + ) + ingest_document = ingest_document_result.content_as[dict] + # instantiate a partialsumsBuilder so we can use its get_station methods + builder_class = partial_sums_builder.PartialSumsSurfaceModelObsBuilderV01 + builder = builder_class(load_spec, ingest_document) + # usually these would get assigned in build_document + builder.bucket = _bucket + builder.scope = _scope + builder.collection = _collection + builder.subset = _collection + + result = cluster.query( + f""" + SELECT name, + geo.bottom_right.lat AS br_lat, + geo.bottom_right.lon AS br_lon, + geo.top_left.lat AS tl_lat, + geo.top_left.lon AS tl_lon + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='MD' + AND docType='region' + AND subset='COMMON' + AND version='V01' + """ + ) + for row in result: + # use the builder geosearch to get the station list - just use current epoch + stations = sorted( + # builder.get_stations_for_region_by_geosearch(row["name"],round(time.time())) + builder.get_stations_for_region_by_sort(row["name"], round(time.time())) ) - cluster = Cluster("couchbase://" + _host, options) - collection = cluster.bucket(_bucket).scope(_scope).collection(_collection) - load_spec = {} - load_spec["cluster"] = cluster - load_spec["collection"] = collection - load_spec["ingest_document_ids"] = [ - f"MD:V01:{_collection}:HRRR_OPS:ALL_HRRR:SUMS:SURFACE:ingest" + # get the legacy station list from the test document (this came from mysql) + # classic_station_id = "MD-TEST:V01:CLASSIC_STATIONS:" + row["name"] + # doc = collection.get(classic_station_id.strip()) + # classic_stations = sorted(doc.content_as[dict]["stations"]) + classic_stations = builder.get_legacy_stations_for_region(row["name"]) + stations_difference = [ + i + for i in classic_stations + stations + if i not in classic_stations or i not in stations ] - # get the ingest document id. - ingest_document_result = collection.get( - f"MD-TEST:V01:{_collection}:HRRR_OPS:ALL_HRRR:SUMS:SURFACE:ingest" - ) - ingest_document = ingest_document_result.content_as[dict] - # instantiate a partialsumsBuilder so we can use its get_station methods - builder_class = partial_sums_builder.PartialSumsSurfaceModelObsBuilderV01 - builder = builder_class(load_spec, ingest_document) - # usually these would get assigned in build_document - builder.bucket = _bucket - builder.scope = _scope - builder.collection = _collection - builder.subset = _collection - - result = cluster.query( - f""" - SELECT name, - geo.bottom_right.lat AS br_lat, - geo.bottom_right.lon AS br_lon, - geo.top_left.lat AS tl_lat, - geo.top_left.lon AS tl_lon - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='MD' - AND docType='region' - AND subset='COMMON' - AND version='V01' - """ + print( + "region " + + row["name"] + + "difference length is " + + str(len(stations_difference)) + + " stations symmetric_difference is " + + str(stations_difference) ) - for row in result: - # use the builder geosearch to get the station list - just use current epoch - stations = sorted( # pylint: disable=redefined-outer-name - # builder.get_stations_for_region_by_geosearch(row["name"],round(time.time())) - builder.get_stations_for_region_by_sort(row["name"], round(time.time())) - ) - # get the legacy station list from the test document (this came from mysql) - # classic_station_id = "MD-TEST:V01:CLASSIC_STATIONS:" + row["name"] - # doc = collection.get(classic_station_id.strip()) - # classic_stations = sorted(doc.content_as[dict]["stations"]) - classic_stations = builder.get_legacy_stations_for_region(row["name"]) - stations_difference = [ - i - for i in classic_stations + stations - if i not in classic_stations or i not in stations - ] - print( - "region " - + row["name"] - + "difference length is " - + str(len(stations_difference)) - + " stations symmetric_difference is " - + str(stations_difference) - ) - assert ( - len(stations_difference) < 1000 - ), "difference between expected and actual greater than 100" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestGsdIngestManager Exception failure: {_e}" + assert ( + len(stations_difference) < 1000 + ), "difference between expected and actual greater than 100" -def test_ps_builder_surface_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals +def test_ps_builder_surface_hrrr_ops_all_hrrr(): """ This test verifies that data is returned for each fcstLen. It can be used to debug the builder by putting a specific epoch for first_epoch. @@ -187,84 +177,72 @@ def test_ps_builder_surface_hrrr_ops_all_hrrr(): # pylint: disable=too-many-loc It calculates the Partial using couchbase data for input. Then the couchbase SUMS fcstValidEpochs are compared and asserted against the derived SUMS. """ - # noinspection PyBroadException - global cb_model_obs_data # pylint: disable=global-variable-not-assigned - global stations # pylint: disable=global-variable-not-assigned - try: - credentials_file = os.environ["CREDENTIALS"] - job_id = "JOB-TEST:V01:METAR:SUMS:SURFACE:MODEL:OPS" - outdir = "/opt/data/test/partial_sums_to_cb/hrrr_ops/sums/output" - if not os.path.exists(outdir): - # Create a new directory because it does not exist - os.makedirs(outdir) - filepaths = outdir + "/*.json" - files = glob.glob(filepaths) - for _f in files: - try: - os.remove(_f) - except OSError as _e: - assert False, f"Error: {_e}" - log_queue = Queue() - vx_ingest = VXIngest() - # These SUM's might already have been ingested in which case this won't do anything. - vx_ingest.runit( - { - "job_id": job_id, - "credentials_file": credentials_file, - "output_dir": outdir, - "threads": 1, - "first_epoch": 1638489600, - "last_epoch": 1638496800, - }, - log_queue, - stub_worker_log_configurer, - ) + global cb_model_obs_data + global stations + + credentials_file = os.environ["CREDENTIALS"] + job_id = "JOB-TEST:V01:METAR:SUMS:SURFACE:MODEL:OPS" + outdir = Path("/opt/data/test/partial_sums_to_cb/hrrr_ops/sums/output") + if not outdir.exists(): + # Create a new directory because it does not exist + outdir.mkdir(parents=True) + files = outdir.glob("*.json") + for _f in files: + Path(_f).unlink() + log_queue = Queue() + vx_ingest = VXIngest() + # These SUM's might already have been ingested in which case this won't do anything. + vx_ingest.runit( + { + "job_id": job_id, + "credentials_file": credentials_file, + "output_dir": str(outdir), + "threads": 1, + "first_epoch": 1638489600, + "last_epoch": 1638496800, + }, + log_queue, + stub_worker_log_configurer, + ) - list_of_output_files = glob.glob(outdir + "/*") - # latest_output_file = max(list_of_output_files, key=os.path.getctime) - latest_output_file = min(list_of_output_files, key=os.path.getctime) - try: - # Opening JSON file - output_file = open(latest_output_file, encoding="utf8") - # returns JSON object as a dictionary - vx_ingest_output_data = json.load(output_file) - # if this is an LJ document then the SUMS's were already ingested - # and the test should stop here - if vx_ingest_output_data[0]["type"] == "LJ": - return - # get the last fcstValidEpochs - fcst_valid_epochs = {doc["fcstValidEpoch"] for doc in vx_ingest_output_data} - # take a fcstValidEpoch in the middle of the list - fcst_valid_epoch = list(fcst_valid_epochs)[int(len(fcst_valid_epochs) / 2)] - # get all the documents that have the chosen fcstValidEpoch - docs = [ - _doc - for _doc in vx_ingest_output_data - if _doc["fcstValidEpoch"] == fcst_valid_epoch - ] - # get all the fcstLens for those docs - fcst_lens = [] - for _elem in docs: - fcst_lens.append(_elem["fcstLen"]) - output_file.close() - except Exception as _e: # pylint: disable=broad-except - assert ( - False - ), f"TestPartialSumsBuilderV01 Exception failure opening output: {_e}" - for _i in fcst_lens: - _elem = None - # find the document for this fcst_len - for _elem in docs: - if _elem["fcstLen"] == _i: - break - assert _elem is not None, "fcstLen not found in output" - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestPartialSumsBuilderV01 Exception failure: {_e}" + list_of_output_files = outdir.glob("*") + # latest_output_file = max(list_of_output_files, key=os.path.getctime) + latest_output_file = min(list_of_output_files, key=os.path.getctime) + # Opening JSON file + with Path(latest_output_file).open(encoding="utf-8") as output_file: + # returns JSON object as a dictionary + vx_ingest_output_data = json.load(output_file) + # if this is an LJ document then the SUMS's were already ingested + # and the test should stop here + if vx_ingest_output_data[0]["type"] == "LJ": + return + # get the last fcstValidEpochs + fcst_valid_epochs = {doc["fcstValidEpoch"] for doc in vx_ingest_output_data} + # take a fcstValidEpoch in the middle of the list + fcst_valid_epoch = list(fcst_valid_epochs)[int(len(fcst_valid_epochs) / 2)] + # get all the documents that have the chosen fcstValidEpoch + docs = [ + _doc + for _doc in vx_ingest_output_data + if _doc["fcstValidEpoch"] == fcst_valid_epoch + ] + # get all the fcstLens for those docs + fcst_lens = [] + for _elem in docs: + fcst_lens.append(_elem["fcstLen"]) -def test_ps_surface_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals - # noinspection PyBroadException + for _i in fcst_lens: + _elem = None + # find the document for this fcst_len + for _elem in docs: + if _elem["fcstLen"] == _i: + break + assert _elem is not None, "fcstLen not found in output" + + +def test_ps_surface_data_hrrr_ops_all_hrrr(): """ This test is a comprehensive test of the partialSumsBuilder data. It will retrieve SUMS documents for a specific fcstValidEpoch from couchbase and calculate the SUM's for the same fcstValidEpoch. @@ -275,15 +253,14 @@ def test_ps_surface_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals credentials_file = os.environ["CREDENTIALS"] assert Path(credentials_file).is_file(), "credentials_file Does not exist" - _f = open(credentials_file, encoding="utf8") - yaml_data = yaml.load(_f, yaml.SafeLoader) + with Path(credentials_file).open(encoding="utf-8") as _f: + yaml_data = yaml.load(_f, yaml.SafeLoader) _host = yaml_data["cb_host"] _user = yaml_data["cb_user"] _password = yaml_data["cb_password"] _bucket = yaml_data["cb_bucket"] _collection = yaml_data["cb_collection"] _scope = yaml_data["cb_scope"] - _f.close() timeout_options = ClusterTimeoutOptions( kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) @@ -293,80 +270,77 @@ def test_ps_surface_data_hrrr_ops_all_hrrr(): # pylint: disable=too-many-locals ) cluster = Cluster("couchbase://" + _host, options) # get available fcstValidEpochs for couchbase - try: - result = cluster.query( - f"""SELECT RAW fcstValidEpoch - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type="DD" - AND docType="SUMS" - AND subDocType = "SURFACE" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}'""" - ) - ps_fcst_valid_epochs = list(result) - # if len(ps_fcst_valid_epochs) == 0: - # assert False, "There is no data" - # choose the last one - fcst_valid_epoch = [] - if len(ps_fcst_valid_epochs) > 0: - fcst_valid_epoch = ps_fcst_valid_epochs[-1] - # get all the cb fcstLen values - result = cluster.query( - f"""SELECT raw fcstLen - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "SUMS" - AND subDocType = "SURFACE" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - order by fcstLen - """ - ) - ps_fcst_valid_lens = list(result) - # get the associated couchbase model data - # get the associated couchbase obs - # get the SUMS couchbase data - result = cluster.query( - f""" - SELECT * - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "SUMS" - AND subDocType = "SURFACE" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - AND fcstLen IN {ps_fcst_valid_lens} - order by fcstLen; - """ - ) - cb_results = list(result) - # print the couchbase statement - print( - "cb statement is:" - + f""" - SELECT * - FROM `{_bucket}`.{_scope}.{_collection} - WHERE type='DD' - AND docType = "SUMS" - AND subDocType = "SURFACE" - AND model='HRRR_OPS' - AND region='ALL_HRRR' - AND version='V01' - AND subset='{_collection}' - AND fcstValidEpoch = {fcst_valid_epoch} - AND fcstLen IN {ps_fcst_valid_lens} - order by fcstLen;""" - ) - for _cb_ps in cb_results: - print(f"do something {_cb_ps}") - except Exception as _e: # pylint: disable=broad-except - assert False, f"TestBuilderV01 Exception failure: {_e}" - return + + result = cluster.query( + f"""SELECT RAW fcstValidEpoch + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type="DD" + AND docType="SUMS" + AND subDocType = "SURFACE" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}'""" + ) + ps_fcst_valid_epochs = list(result) + # if len(ps_fcst_valid_epochs) == 0: + # assert False, "There is no data" + # choose the last one + fcst_valid_epoch = [] + if len(ps_fcst_valid_epochs) > 0: + fcst_valid_epoch = ps_fcst_valid_epochs[-1] + # get all the cb fcstLen values + result = cluster.query( + f"""SELECT raw fcstLen + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "SUMS" + AND subDocType = "SURFACE" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + order by fcstLen + """ + ) + ps_fcst_valid_lens = list(result) + # get the associated couchbase model data + # get the associated couchbase obs + # get the SUMS couchbase data + result = cluster.query( + f""" + SELECT * + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "SUMS" + AND subDocType = "SURFACE" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + AND fcstLen IN {ps_fcst_valid_lens} + order by fcstLen; + """ + ) + cb_results = list(result) + # print the couchbase statement + print( + "cb statement is:" + + f""" + SELECT * + FROM `{_bucket}`.{_scope}.{_collection} + WHERE type='DD' + AND docType = "SUMS" + AND subDocType = "SURFACE" + AND model='HRRR_OPS' + AND region='ALL_HRRR' + AND version='V01' + AND subset='{_collection}' + AND fcstValidEpoch = {fcst_valid_epoch} + AND fcstLen IN {ps_fcst_valid_lens} + order by fcstLen;""" + ) + for _cb_ps in cb_results: + print(f"do something {_cb_ps}") diff --git a/tests/vxingest/partial_sums_to_cb/test_unit_metar_partial_sums.py b/tests/vxingest/partial_sums_to_cb/test_unit_metar_partial_sums.py index 8b98d5c..b8d9c08 100644 --- a/tests/vxingest/partial_sums_to_cb/test_unit_metar_partial_sums.py +++ b/tests/vxingest/partial_sums_to_cb/test_unit_metar_partial_sums.py @@ -1,37 +1,32 @@ -# pylint: disable=missing-module-docstring import os from multiprocessing import JoinableQueue +import pytest from vxingest.partial_sums_to_cb.run_ingest_threads import VXIngest from vxingest.partial_sums_to_cb.vx_ingest_manager import VxIngestManager def setup_ingest(): """test setup""" - try: - _vx_ingest = VXIngest() - _vx_ingest.credentials_file = os.environ["CREDENTIALS"] - _vx_ingest.load_spec = {} - _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) - _vx_ingest.connect_cb() - _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( - "JOB-TEST:V01:METAR:CTC:CEILING:MODEL:OPS" - ).content_as[dict]["ingest_document_ids"] - _vx_ingest.load_spec["ingest_documents"] = {} - for _id in _vx_ingest.load_spec["ingest_document_ids"]: - _vx_ingest.load_spec["ingest_documents"][_id] = _vx_ingest.collection.get( - _id - ).content_as[dict] + _vx_ingest = VXIngest() + _vx_ingest.credentials_file = os.environ["CREDENTIALS"] + _vx_ingest.load_spec = {} + _vx_ingest.cb_credentials = _vx_ingest.get_credentials(_vx_ingest.load_spec) + _vx_ingest.connect_cb() + _vx_ingest.load_spec["ingest_document_ids"] = _vx_ingest.collection.get( + "JOB-TEST:V01:METAR:CTC:CEILING:MODEL:OPS" + ).content_as[dict]["ingest_document_ids"] + _vx_ingest.load_spec["ingest_documents"] = {} + for _id in _vx_ingest.load_spec["ingest_document_ids"]: + _vx_ingest.load_spec["ingest_documents"][_id] = _vx_ingest.collection.get( + _id + ).content_as[dict] - vx_ingest_manager = VxIngestManager( - "test", _vx_ingest.load_spec, JoinableQueue(), "/tmp", None, None - ) - assert ( - vx_ingest_manager is not None - ), "vx_ingest_manager is None and should not be" - return _vx_ingest, vx_ingest_manager - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" + vx_ingest_manager = VxIngestManager( + "test", _vx_ingest.load_spec, JoinableQueue(), "/tmp", None, None + ) + assert vx_ingest_manager is not None, "vx_ingest_manager is None and should not be" + return _vx_ingest, vx_ingest_manager def test_cb_connect_disconnect(): @@ -45,8 +40,8 @@ def test_cb_connect_disconnect(): assert vx_ingest is not None, "vx_ingest is None" assert local_time is not None, "local_time from CB should not be None" vx_ingest_manager.close_cb() - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_cb_connect_disconnect Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_cb_connect_disconnect Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() @@ -59,37 +54,36 @@ def test_credentials_and_load_spec(): vx_ingest, vx_ingest_manager = setup_ingest() assert vx_ingest.load_spec["cb_connection"]["user"] == "avid" vx_ingest_manager.close_cb() - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_credentials_and_load_spec Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_credentials_and_load_spec Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() -def test_write_load_job_to_files(): +def test_write_load_job_to_files(tmp_path): """test write the load job""" vx_ingest_manager = None try: vx_ingest, vx_ingest_manager = setup_ingest() vx_ingest.load_job_id = "test_id" - vx_ingest.output_dir = "/tmp" + vx_ingest.output_dir = tmp_path vx_ingest.load_spec["load_job_doc"] = {"test": "a line of text"} vx_ingest.write_load_job_to_files() - os.remove("/tmp/test_id.json") - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_write_load_job_to_files Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_write_load_job_to_files Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() -def test_build_load_job_doc(): +def test_build_load_job_doc(tmp_path): """test the build load job""" vx_ingest_manager = None try: vx_ingest, vx_ingest_manager = setup_ingest() vx_ingest.load_job_id = "test_id" - vx_ingest.path = "/tmp" + vx_ingest.path = tmp_path vx_ingest.load_spec["load_job_doc"] = {"test": "a line of text"} ljd = vx_ingest.build_load_job_doc("partial_sums") assert ljd[ @@ -97,8 +91,8 @@ def test_build_load_job_doc(): ].startswith( "LJ:METAR:vxingest.partial_sums_to_cb.run_ingest_threads:VXIngest" ), f"load_job ID is wrong: {ljd['id']} does not start with 'LJ:METAR:vxingest.partial_sums_to_cb.run_ingest_threads:VXIngest'" - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_build_load_job_doc Exception failure: {_e}" + except Exception as _e: + pytest.fail(f"test_build_load_job_doc Exception failure: {_e}") finally: if vx_ingest_manager is not None: vx_ingest_manager.close_cb() diff --git a/tests/vxingest/partial_sums_to_cb/test_unit_queries.py b/tests/vxingest/partial_sums_to_cb/test_unit_queries.py index 66e38ab..ad84edd 100644 --- a/tests/vxingest/partial_sums_to_cb/test_unit_queries.py +++ b/tests/vxingest/partial_sums_to_cb/test_unit_queries.py @@ -1,4 +1,3 @@ -# pylint: disable=missing-module-docstring import os from datetime import timedelta from pathlib import Path @@ -13,191 +12,144 @@ def connect_cb(): """ create a couchbase connection and maintain the collection and cluster objects. """ - # noinspection PyBroadException - try: - try: - cb_connection # is it defined pylint:disable=used-before-assignment - except NameError: - credentials_file = os.environ["CREDENTIALS"] - assert ( - Path(credentials_file).is_file() is True - ), f"*** credentials_file file {credentials_file} can not be found!" - _f = open(credentials_file, encoding="utf-8") - _yaml_data = yaml.load(_f, yaml.SafeLoader) - cb_connection = {} - cb_connection["host"] = _yaml_data["cb_host"] - cb_connection["user"] = _yaml_data["cb_user"] - cb_connection["password"] = _yaml_data["cb_password"] - cb_connection["bucket"] = _yaml_data["cb_bucket"] - cb_connection["collection"] = _yaml_data["cb_collection"] - cb_connection["scope"] = _yaml_data["cb_scope"] - _f.close() - - timeout_options = ClusterTimeoutOptions( - kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) - ) - options = ClusterOptions( - PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), - timeout_options=timeout_options, - ) - cb_connection["cluster"] = Cluster( - "couchbase://" + cb_connection["host"], options - ) - cb_connection["collection"] = ( - cb_connection["cluster"] - .bucket(cb_connection["bucket"]) - .collection(cb_connection["collection"]) - ) - return cb_connection - except Exception as _e: # pylint:disable=broad-except - assert False, f"test_unit_queries Exception failure connecting: {_e}" + + credentials_file = os.environ["CREDENTIALS"] + assert ( + Path(credentials_file).is_file() is True + ), f"*** credentials_file file {credentials_file} can not be found!" + with Path(credentials_file).open(encoding="utf-8") as _f: + _yaml_data = yaml.load(_f, yaml.SafeLoader) + cb_connection = {} + cb_connection["host"] = _yaml_data["cb_host"] + cb_connection["user"] = _yaml_data["cb_user"] + cb_connection["password"] = _yaml_data["cb_password"] + cb_connection["bucket"] = _yaml_data["cb_bucket"] + cb_connection["collection"] = _yaml_data["cb_collection"] + cb_connection["scope"] = _yaml_data["cb_scope"] + + timeout_options = ClusterTimeoutOptions( + kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120) + ) + options = ClusterOptions( + PasswordAuthenticator(cb_connection["user"], cb_connection["password"]), + timeout_options=timeout_options, + ) + cb_connection["cluster"] = Cluster("couchbase://" + cb_connection["host"], options) + cb_connection["collection"] = ( + cb_connection["cluster"] + .bucket(cb_connection["bucket"]) + .collection(cb_connection["collection"]) + ) + return cb_connection def test_epoch_fcstlen_model(request): - """test""" - try: - _name = request.node.name - _expected_time = 3.0 - testdata = Path( - "tests/vxingest/partial_sums_to_cb/testdata/test_epoch_fcstLen_model.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 3.0 + testdata = Path( + "tests/vxingest/partial_sums_to_cb/testdata/test_epoch_fcstLen_model.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_epoch_fcstlen_obs(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.2 - testdata = Path( - "tests/vxingest/partial_sums_to_cb/testdata/test_epoch_fcstLen_obs.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.2 + testdata = Path( + "tests/vxingest/partial_sums_to_cb/testdata/test_epoch_fcstLen_obs.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_forecast_valid_epoch(request): - """test""" - try: - _name = request.node.name - _expected_time = 4.0 - testdata = Path( - "tests/vxingest/partial_sums_to_cb/testdata/test_forecast_valid_epoch.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 4.0 + testdata = Path( + "tests/vxingest/partial_sums_to_cb/testdata/test_forecast_valid_epoch.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_region_lat_lon(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.01 - testdata = Path( - "tests/vxingest/partial_sums_to_cb/testdata/test_get_region_lat_lon.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 0.01 + testdata = Path( + "tests/vxingest/partial_sums_to_cb/testdata/test_get_region_lat_lon.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_stations(request): - """test""" - try: - _name = request.node.name - _expected_time = 3 - testdata = Path( - "tests/vxingest/partial_sums_to_cb/testdata/test_get_stations.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" + _name = request.node.name + _expected_time = 3 + testdata = Path("tests/vxingest/partial_sums_to_cb/testdata/test_get_stations.n1ql") + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" def test_get_threshold_descriptions(request): - """test""" - try: - _name = request.node.name - _expected_time = 0.01 - testdata = Path( - "tests/vxingest/partial_sums_to_cb/testdata/test_get_threshold_descriptions.n1ql" - ) - with testdata.open(mode="r", encoding="utf-8") as file: - _statement = file.read() - result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) - # have to read the rows before we can get to the metadata as of couchbase 4.1 - _rows = list(result.rows()) - elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() - print(f"{_name}: elapsed_time is {elapsed_time}") - assert result is not None, "{_name}: result is None" - assert ( - elapsed_time < _expected_time - ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}" - except Exception as _e: # pylint:disable=broad-except - assert False, f"{_name} Exception failure: {_e}" - - -# geo search for stations - currently not implemented -# _result1 = self.load_spec["cluster"].search_query( -# "station_geo", -# GeoBoundingBoxQuery( -# top_left=(_boundingbox["tl_lon"], _boundingbox["tl_lat"]), -# bottom_right=(_boundingbox["br_lon"], _boundingbox["br_lat"]), -# field="geo", -# ), -# SearchOptions(fields=["name"], limit=10000), -# ) + _name = request.node.name + _expected_time = 0.01 + testdata = Path( + "tests/vxingest/partial_sums_to_cb/testdata/test_get_threshold_descriptions.n1ql" + ) + with testdata.open(mode="r", encoding="utf-8") as file: + _statement = file.read() + result = connect_cb()["cluster"].query(_statement, QueryOptions(metrics=True)) + # have to read the rows before we can get to the metadata as of couchbase 4.1 + _rows = list(result.rows()) + elapsed_time = result.metadata().metrics().elapsed_time().total_seconds() + print(f"{_name}: elapsed_time is {elapsed_time}") + assert result is not None, "{_name}: result is None" + assert ( + elapsed_time < _expected_time + ), f"{_name}: elasped_time greater than {_expected_time} {elapsed_time}"