Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add linting to CI #284

Merged
merged 19 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ jobs:
poetry install
- name: Format with Ruff
run: poetry run ruff format --check src tests
- name: Lint with Ruff
run: poetry run ruff check --output-format=github src tests
build-ingest:
name: Build Ingest image
runs-on: ubuntu-latest
Expand Down
19 changes: 15 additions & 4 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
select = ["E", "F", "UP", "I", "PTH", "PT", "B"]
# select = ["ALL"]
# For a list of available rules, see: https://docs.astral.sh/ruff/rules/
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"UP", # pyupgrade
"I", # isort
"PTH", # flake8-use-pathlib
"PT", # flake8-pytest-style
"B", # bugbear
"SIM", # flake8-simplify
]

ignore = [
"E501" # Supress line-too-long warnings: trust black's judgement on this one.
]
"E501", # Supress line-too-long warnings: trust the formatter's judgement on this one.
"W505", # Supress line-too-long warnings: trust the formatter's judgement on this one.
]
24 changes: 12 additions & 12 deletions src/vxingest/builder_common/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
"""

import logging
import os
from pathlib import Path


class Builder: # pylint: disable=too-many-arguments disable=too-many-instance-attributes
class Builder:
"""
Parent class for all Builders
"""
Expand All @@ -21,38 +21,38 @@ def __init__(self, load_spec, ingest_document):
# self.do_profiling = True # set to True to enable build_document profiling
self.do_profiling = False

def initialize_document_map(self): # pylint: disable=missing-function-docstring
def initialize_document_map(self):
pass

def get_document_map(self): # pylint: disable=missing-function-docstring
def get_document_map(self):
pass

def handle_data(self, **kwargs): # pylint: disable=missing-function-docstring
def handle_data(self, **kwargs):
pass

def derive_id(self, **kwargs): # pylint: disable=missing-function-docstring
def derive_id(self, **kwargs):
pass

def load_data(self, doc, key, element): # pylint: disable=missing-function-docstring
def load_data(self, doc, key, element):
pass

def handle_document(self): # pylint: disable=missing-function-docstring
def handle_document(self):
pass

def build_document(self, queue_element): # pylint: disable=missing-function-docstring
def build_document(self, queue_element):
pass

def build_datafile_doc(self, file_name, data_file_id, origin_type): # pylint: disable=missing-function-docstring
def build_datafile_doc(self, file_name, data_file_id, origin_type):
pass

def create_data_file_id(self, subset, file_type, origin_type, file_name):
"""
This method creates a metar grib_to_cb datafile id from the parameters
"""
try:
base_name = os.path.basename(file_name)
base_name = Path(file_name).name
an_id = f"DF:{subset}:{file_type}:{origin_type}:{base_name}"
return an_id
except Exception as _e: # pylint: disable=broad-except
except Exception as _e:
logging.exception("%s create_data_file_id", self.__class__.__name__)
return None
6 changes: 3 additions & 3 deletions src/vxingest/builder_common/builder_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def convert_to_iso(an_epoch):
def initialize_data(doc):
"""initialize the data by just making sure the template data element has been removed.
All the data elements are going to be top level elements"""
if "data" in doc.keys():
if "data" in doc:
del doc["data"]
return doc

Expand Down Expand Up @@ -55,7 +55,7 @@ def get_geo_index(fcst_valid_epoch, geo):
return geo_index
else:
return latest_index
except Exception as _e: # pylint: disable=bare-except, disable=broad-except
except Exception as _e:
logging.error("CTCBuilder.get_geo_index: Exception error: %s", str(_e))
return 0

Expand All @@ -76,6 +76,6 @@ def truncate_round(_n, decimals=0):
def initialize_data_array(doc):
"""initialize the data by just making sure the template data element has been removed.
All the data elements are going to be top level elements"""
if "data" in doc.keys():
if "data" in doc:
del doc["data"]
return doc
37 changes: 17 additions & 20 deletions src/vxingest/builder_common/ingest_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
logger = logging.getLogger(__name__)


class CommonVxIngestManager(Process): # pylint:disable=too-many-instance-attributes
class CommonVxIngestManager(Process):
"""
IngestManager is a Process Thread that manages an object pool of
builders to ingest data from GSD grib2 files or netcdf files into documents that can be
Expand All @@ -47,7 +47,6 @@ class CommonVxIngestManager(Process): # pylint:disable=too-many-instance-attrib
and dies.
"""

# pylint:disable=too-many-arguments
def __init__(
self,
name,
Expand Down Expand Up @@ -78,14 +77,14 @@ def __init__(
self.logging_queue = logging_queue
self.logging_configurer = logging_configurer

if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
if not Path(self.output_dir).exists():
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
if not os.access(self.output_dir, os.W_OK):
_re = RuntimeError("Output directory: %s is not writable!", self.output_dir)
logger.exception(_re)
raise _re

def process_queue_element(self, queue_element): # pylint: disable=missing-function-docstring
def process_queue_element(self, queue_element):
pass

def close_cb(self):
Expand All @@ -104,7 +103,7 @@ def connect_cb(self):
"""
logger.info("data_type_manager - Connecting to couchbase")
# get a reference to our cluster
# noinspection PyBroadException

try:
timeout_options = ClusterTimeoutOptions(
kv_timeout=timedelta(seconds=25), query_timeout=timedelta(seconds=120)
Expand All @@ -125,7 +124,7 @@ def connect_cb(self):
self.load_spec["cluster"] = self.cluster
self.load_spec["collection"] = self.collection
logger.info("Couchbase connection success")
except Exception as _e: # pylint:disable=broad-except
except Exception as _e:
logger.exception(
"*** builder_common.CommonVxIngestManager in connect_cb ***"
)
Expand All @@ -148,7 +147,6 @@ def run(self):
self.logging_configurer(self.logging_queue)
logger.info(f"Registered new process: {self.thread_name}")

# noinspection PyBroadException
try:
self.cb_credentials = self.load_spec["cb_connection"]
# get a connection
Expand Down Expand Up @@ -185,7 +183,7 @@ def run(self):
self.thread_name,
)
break
except Exception as _e: # pylint:disable=broad-except
except Exception as _e:
logger.exception("%s: *** Error in IngestManager run ***", self.thread_name)
raise _e
finally:
Expand All @@ -201,7 +199,7 @@ def write_document_to_cb(self, queue_element, document_map):
"""
# The document_map is all built now so write all the
# documents in the document_map into couchbase
# noinspection PyBroadException

try:
logger.info(
"process_element writing documents for queue_element :%s with threadName: %s",
Expand Down Expand Up @@ -237,7 +235,7 @@ def write_document_to_cb(self, queue_element, document_map):
"process_element - executing upsert: elapsed time: %s",
str(upsert_stop_time - upsert_start_time),
)
except Exception as _e: # pylint:disable=broad-except
except Exception as _e:
logger.exception(
"%s: *** Error writing to Couchbase: in process_element writing document ***",
self.thread_name,
Expand Down Expand Up @@ -266,8 +264,8 @@ def write_document_to_files(self, file_name, document_map):
else:
Path(self.output_dir).mkdir(parents=True, exist_ok=True)
try:
file_name = os.path.basename(file_name) + ".json"
complete_file_name = os.path.join(self.output_dir, file_name)
file_name = Path(file_name).name + ".json"
complete_file_name = Path(self.output_dir) / file_name
# how many documents are we writing? Log it for alert
num_documents = len(list(document_map.values()))
logger.info(
Expand All @@ -276,17 +274,16 @@ def write_document_to_files(self, file_name, document_map):
num_documents,
complete_file_name,
)
_f = open(complete_file_name, "w", encoding="utf-8")
# we need to write out a list of the values of the _document_map for cbimport
json_data = json.dumps(list(document_map.values()))
_f.write(json_data)
_f.close()
except Exception as _e1: # pylint:disable=broad-except
with Path(complete_file_name).open("w", encoding="utf-8") as _f:
# we need to write out a list of the values of the _document_map for cbimport
json_data = json.dumps(list(document_map.values()))
_f.write(json_data)
except Exception as _e1:
logger.exception(
"write_document_to_files - trying write: Got Exception %s",
str(_e1),
)
except Exception as _e: # pylint:disable=broad-except
except Exception as _e:
logger.exception(
": *** {self.thread_name} Error writing to files: in process_element writing document*** %s",
str(_e),
Expand Down
26 changes: 11 additions & 15 deletions src/vxingest/builder_common/load_backup_ingest_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(self):

def run(self, args):
"thread start"
# noinspection PyBroadException

try:
credentials_file = args["credentials_file"]
# check for existence of file
Expand All @@ -63,29 +63,25 @@ def run(self, args):
+ credentials_file
+ " can not be found!"
)
_f = open(credentials_file, encoding="utf-8")
yaml_data = yaml.load(_f, yaml.SafeLoader)
with Path(credentials_file).open(encoding="utf-8") as _f:
yaml_data = yaml.load(_f, yaml.SafeLoader)
self.cb_credentials["host"] = yaml_data["cb_host"]
self.cb_credentials["user"] = yaml_data["cb_user"]
self.cb_credentials["password"] = yaml_data["cb_password"]
_f.close()

# Get JSON data as a dict
f_name = args["file_name"]
# Opening JSON file
_f = open(f_name, encoding="utf-8")
# returns JSON object as
# a dictionary
list_data = json.load(_f)
with Path(f_name).open(encoding="utf-8") as _f:
list_data = json.load(_f)
data = {}
for elem in list_data:
_id = elem["id"]
del elem["id"]
data[_id] = elem
_f.close()
self.connect_cb()
self.collection.upsert_multi(data)
except: # pylint: disable=bare-except, disable=broad-except
print(": *** %s Error in multi-upsert *** " + str(sys.exc_info()))
except Exception as e:
print(f" *** Error in multi-upsert *** {e}")
finally:
# close any mysql connections
self.close_cb()
Expand All @@ -98,7 +94,7 @@ def close_cb(self):
def connect_cb(self):
"""Connect to database"""
# get a reference to our cluster
# noinspection PyBroadException

try:
options = ClusterOptions(
PasswordAuthenticator(
Expand All @@ -109,8 +105,8 @@ def connect_cb(self):
"couchbase://" + self.cb_credentials["host"], options
)
self.collection = self.cluster.bucket("mdata").default_collection()
except: # pylint: disable=bare-except, disable=broad-except
print("*** %s in connect_cb ***" + str(sys.exc_info()))
except Exception as e:
print(f"*** Error in connect_cb *** {e}")
sys.exit("*** Error when connecting to mysql database: ")

def main(self):
Expand Down
Loading