From 5bb459c90dd1629666ba72f8d637c04bd9ff58ed Mon Sep 17 00:00:00 2001 From: Ryan Kingsbury Date: Thu, 20 Jun 2024 16:01:31 -0400 Subject: [PATCH 1/5] mv API to optional group; make opendata default --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 359b8b484..d91bb2115 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,9 +34,10 @@ dependencies = [ "pydash>=4.1.0", "jsonschema>=3.1.1", "tqdm>=4.19.6", + "pandas>=2.1.4", + "jsonlines>=4.0.0", "aioitertools>=0.5.1", "numpy>=1.17.3", - "fastapi>=0.42.0", "pyzmq>=24.0.1", "dnspython>=1.16.0", "sshtunnel>=0.1.5", @@ -44,7 +45,6 @@ dependencies = [ "orjson>=3.9.0", "boto3>=1.20.41", "python-dateutil>=2.8.2", - "uvicorn>=0.18.3", ] [project.urls] @@ -63,7 +63,7 @@ montydb = ["montydb>=2.3.12"] mongogrant = ["mongogrant>=0.3.1"] notebook_runner = ["IPython>=8.11", "nbformat>=5.0", "regex>=2020.6"] azure = ["azure-storage-blob>=12.16.0", "azure-identity>=1.12.0"] -open_data = ["pandas>=2.1.4", "jsonlines>=4.0.0"] +api = ["fastapi>=0.42.0","uvicorn>=0.18.3"] testing = [ "pytest", "pytest-cov", From 6756076c34d69956d5cc0286e952aa7da69da75e Mon Sep 17 00:00:00 2001 From: Ryan Kingsbury Date: Thu, 20 Jun 2024 16:07:32 -0400 Subject: [PATCH 2/5] update pyproject.toml for numpy 2.0 --- pyproject.toml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d91bb2115..e383d2e9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,9 @@ line-length = 120 [tool.ruff] line-length = 120 +src = ["src"] + +[tool.ruff.lint] select = [ "B", # flake8-bugbear "C4", # flake8-comprehensions @@ -132,6 +135,7 @@ select = [ "UP", # pyupgrade "W", # pycodestyle warning "YTT", # flake8-2020 + "NPY201", #numpy 2.0 ] ignore = [ "B023", # Function definition does not bind loop variable @@ -155,10 +159,8 @@ ignore = [ ] pydocstyle.convention = "google" isort.split-on-trailing-comma = false -src = ["src"] - -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] "tasks.py" = ["D"] "tests/*" = ["D"] From a81006db1492ea0a8dbf572adecfff591e5088b1 Mon Sep 17 00:00:00 2001 From: Ryan Kingsbury Date: Thu, 20 Jun 2024 16:12:48 -0400 Subject: [PATCH 3/5] pre-commit autoupdate --- .pre-commit-config.yaml | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19c16f0e8..5308c8034 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,18 +10,18 @@ ci: repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.280 + rev: v0.4.10 hooks: - id: ruff args: [--fix, --show-fixes, --ignore, D, --extend-select, D411] - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 24.4.2 hooks: - id: black - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.3.0 hooks: - id: codespell stages: [commit, commit-msg] @@ -29,7 +29,7 @@ repos: additional_dependencies: [tomli] # needed to read pyproject.toml below py3.11 - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.6.0 hooks: - id: check-case-conflict - id: check-symlinks diff --git a/pyproject.toml b/pyproject.toml index e383d2e9c..ae699b9e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,7 +135,7 @@ select = [ "UP", # pyupgrade "W", # pycodestyle warning "YTT", # flake8-2020 - "NPY201", #numpy 2.0 + "NPY201", # numpy 2.0 ] ignore = [ "B023", # Function definition does not bind loop variable From b2bc35562771a658da13d82b78e08ac552187baa Mon Sep 17 00:00:00 2001 From: Ryan Kingsbury Date: Thu, 20 Jun 2024 16:14:24 -0400 Subject: [PATCH 4/5] ruff check . --fix + pre-commit fixes --- src/maggma/api/API.py | 1 - src/maggma/api/__init__.py | 2 +- src/maggma/api/models.py | 2 -- src/maggma/api/query_operator/dynamic.py | 8 +++----- src/maggma/api/query_operator/pagination.py | 4 +--- src/maggma/api/query_operator/sparse_fields.py | 5 +---- src/maggma/api/query_operator/submission.py | 2 +- src/maggma/api/resource/aggregation.py | 1 - src/maggma/api/resource/core.py | 2 -- src/maggma/api/resource/post_resource.py | 1 - src/maggma/api/resource/read_resource.py | 1 - src/maggma/api/resource/s3_url.py | 1 - src/maggma/api/resource/submission.py | 2 -- src/maggma/api/resource/utils.py | 2 +- src/maggma/api/utils.py | 2 -- src/maggma/builders/group_builder.py | 3 +-- src/maggma/builders/map_builder.py | 8 +++----- src/maggma/cli/serial.py | 1 - src/maggma/cli/source_loader.py | 1 - src/maggma/cli/sources/__init__.py | 2 +- src/maggma/core/__init__.py | 3 ++- src/maggma/stores/__init__.py | 3 ++- src/maggma/stores/advanced_stores.py | 4 ++-- src/maggma/stores/aws.py | 3 ++- src/maggma/stores/azure.py | 7 +------ src/maggma/stores/compound_stores.py | 10 +++++----- src/maggma/stores/gridfs.py | 4 ---- src/maggma/stores/mongolike.py | 5 ----- src/maggma/stores/open_data.py | 4 ++-- src/maggma/stores/ssh_tunnel.py | 1 - src/maggma/utils.py | 4 +--- src/maggma/validators.py | 2 -- tests/builders/test_projection_builder.py | 5 +++-- 33 files changed, 33 insertions(+), 73 deletions(-) diff --git a/src/maggma/api/API.py b/src/maggma/api/API.py index fb7b0cdc9..3078388d5 100644 --- a/src/maggma/api/API.py +++ b/src/maggma/api/API.py @@ -94,7 +94,6 @@ def app(self): @app.head("/heartbeat", include_in_schema=False) def heartbeat(): """API Heartbeat for Load Balancing.""" - return { "status": "OK", "time": datetime.utcnow(), diff --git a/src/maggma/api/__init__.py b/src/maggma/api/__init__.py index 97b6261f5..974e269de 100644 --- a/src/maggma/api/__init__.py +++ b/src/maggma/api/__init__.py @@ -1 +1 @@ -""" Simple API Interface for Maggma. """ +"""Simple API Interface for Maggma.""" diff --git a/src/maggma/api/models.py b/src/maggma/api/models.py index 0f3c0d413..32677bc40 100644 --- a/src/maggma/api/models.py +++ b/src/maggma/api/models.py @@ -12,7 +12,6 @@ class Meta(BaseModel): - """ Meta information for the MAPI Response. """ @@ -76,7 +75,6 @@ def default_meta(cls, v, values): class S3URLDoc(BaseModel): - """ S3 pre-signed URL data returned by the S3 URL resource. """ diff --git a/src/maggma/api/query_operator/dynamic.py b/src/maggma/api/query_operator/dynamic.py index 5d4b9b8f9..792a6af78 100644 --- a/src/maggma/api/query_operator/dynamic.py +++ b/src/maggma/api/query_operator/dynamic.py @@ -75,7 +75,7 @@ def query(**kwargs) -> STORE_PARAMS: self.query = query # type: ignore def query(self): - "Stub query function for abstract class." + """Stub query function for abstract class.""" @abstractmethod def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query, Callable[..., dict]]]: @@ -105,7 +105,7 @@ def as_dict(self) -> dict: class NumericQuery(DynamicQueryOperator): - "Query Operator to enable searching on numeric fields." + """Query Operator to enable searching on numeric fields.""" def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query, Callable[..., dict]]]: """ @@ -115,7 +115,6 @@ def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query object, and callable to convert it into a query dict. """ - ops = [] field_type = field.annotation @@ -190,7 +189,7 @@ def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, class StringQueryOperator(DynamicQueryOperator): - "Query Operator to enable searching on numeric fields." + """Query Operator to enable searching on numeric fields.""" def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query, Callable[..., dict]]]: """ @@ -200,7 +199,6 @@ def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query object, and callable to convert it into a query dict. """ - ops = [] field_type: type = field.annotation diff --git a/src/maggma/api/query_operator/pagination.py b/src/maggma/api/query_operator/pagination.py index 79aa1f319..136891e92 100644 --- a/src/maggma/api/query_operator/pagination.py +++ b/src/maggma/api/query_operator/pagination.py @@ -13,7 +13,6 @@ def __init__(self, default_limit: int = 100, max_limit: int = 1000): default_limit: the default number of documents to return max_limit: max number of documents to return. """ - self.default_limit = default_limit self.max_limit = max_limit @@ -39,7 +38,6 @@ def query( """ Pagination parameters for the API Endpoint. """ - if _page is not None: if _per_page > max_limit: raise HTTPException( @@ -78,7 +76,7 @@ def query( self.query = query # type: ignore def query(self): - "Stub query function for abstract class." + """Stub query function for abstract class.""" def meta(self) -> dict: """ diff --git a/src/maggma/api/query_operator/sparse_fields.py b/src/maggma/api/query_operator/sparse_fields.py index f992c8630..72a435b68 100644 --- a/src/maggma/api/query_operator/sparse_fields.py +++ b/src/maggma/api/query_operator/sparse_fields.py @@ -15,7 +15,6 @@ def __init__(self, model: type[BaseModel], default_fields: Optional[list[str]] = model: PyDantic Model that represents the underlying data source default_fields: default fields to return in the API response if no fields are explicitly requested. """ - self.model = model model_name = self.model.__name__ # type: ignore @@ -34,7 +33,6 @@ def query( """ Pagination parameters for the API Endpoint. """ - properties = _fields.split(",") if isinstance(_fields, str) else self.default_fields if _all_fields: properties = model_fields @@ -44,7 +42,7 @@ def query( self.query = query # type: ignore def query(self): - "Stub query function for abstract class." + """Stub query function for abstract class.""" def meta(self) -> dict: """ @@ -56,7 +54,6 @@ def as_dict(self) -> dict: """ Special as_dict implemented to convert pydantic models into strings. """ - d = super().as_dict() # Ensures sub-classes serialize correctly d["model"] = f"{self.model.__module__}.{self.model.__name__}" # type: ignore return d diff --git a/src/maggma/api/query_operator/submission.py b/src/maggma/api/query_operator/submission.py index 4b75f7694..8c26dc57a 100644 --- a/src/maggma/api/query_operator/submission.py +++ b/src/maggma/api/query_operator/submission.py @@ -40,4 +40,4 @@ def query( self.query = query def query(self): - "Stub query function for abstract class." + """Stub query function for abstract class.""" diff --git a/src/maggma/api/resource/aggregation.py b/src/maggma/api/resource/aggregation.py index a3bcf3ec6..6bdc9ad38 100644 --- a/src/maggma/api/resource/aggregation.py +++ b/src/maggma/api/resource/aggregation.py @@ -60,7 +60,6 @@ def prepare_endpoint(self): Internal method to prepare the endpoint by setting up default handlers for routes. """ - self.build_dynamic_model_search() def build_dynamic_model_search(self): diff --git a/src/maggma/api/resource/core.py b/src/maggma/api/resource/core.py index bb183c8dd..636c76053 100644 --- a/src/maggma/api/resource/core.py +++ b/src/maggma/api/resource/core.py @@ -52,7 +52,6 @@ def redirect_unslashed(): Redirects unforward slashed url to resource url with the forward slash. """ - url = self.router.url_path_for("/") return RedirectResponse(url=url, status_code=301) @@ -71,7 +70,6 @@ def as_dict(self) -> dict: """ Special as_dict implemented to convert pydantic models into strings. """ - d = super().as_dict() # Ensures sub-classes serialize correctly d["model"] = f"{self.model.__module__}.{self.model.__name__}" return d diff --git a/src/maggma/api/resource/post_resource.py b/src/maggma/api/resource/post_resource.py index b60b56e3d..63312dc57 100644 --- a/src/maggma/api/resource/post_resource.py +++ b/src/maggma/api/resource/post_resource.py @@ -75,7 +75,6 @@ def prepare_endpoint(self): Internal method to prepare the endpoint by setting up default handlers for routes. """ - self.build_dynamic_model_search() def build_dynamic_model_search(self): diff --git a/src/maggma/api/resource/read_resource.py b/src/maggma/api/resource/read_resource.py index a256e1191..1bf3aa5ee 100644 --- a/src/maggma/api/resource/read_resource.py +++ b/src/maggma/api/resource/read_resource.py @@ -100,7 +100,6 @@ def prepare_endpoint(self): Internal method to prepare the endpoint by setting up default handlers for routes. """ - if self.enable_get_by_key: self.build_get_by_key() diff --git a/src/maggma/api/resource/s3_url.py b/src/maggma/api/resource/s3_url.py index a6a7b342a..26e0f994e 100644 --- a/src/maggma/api/resource/s3_url.py +++ b/src/maggma/api/resource/s3_url.py @@ -56,7 +56,6 @@ def prepare_endpoint(self): Internal method to prepare the endpoint by setting up default handlers for routes. """ - self.build_get_by_key() def build_get_by_key(self): diff --git a/src/maggma/api/resource/submission.py b/src/maggma/api/resource/submission.py index 0a7700ed0..0f09efd80 100644 --- a/src/maggma/api/resource/submission.py +++ b/src/maggma/api/resource/submission.py @@ -64,7 +64,6 @@ def __init__( post_sub_path: POST sub-URL path for the resource. patch_sub_path: PATCH sub-URL path for the resource. """ - if isinstance(state_enum, Enum) and default_state not in [entry.value for entry in state_enum]: # type: ignore raise RuntimeError("If data is stateful a state enum and valid default value must be provided") @@ -118,7 +117,6 @@ def prepare_endpoint(self): Internal method to prepare the endpoint by setting up default handlers for routes. """ - if self.enable_default_search: self.build_search_data() diff --git a/src/maggma/api/resource/utils.py b/src/maggma/api/resource/utils.py index 9ff21e652..cb75fc16d 100644 --- a/src/maggma/api/resource/utils.py +++ b/src/maggma/api/resource/utils.py @@ -56,7 +56,7 @@ def generate_query_pipeline(query: dict, store: Store): pipeline.append(sort_dict) pipeline.append({"$project": projection_dict}) - pipeline.append({"$skip": query["skip"] if "skip" in query else 0}) + pipeline.append({"$skip": query.get("skip", 0)}) if query.get("limit", False): pipeline.append({"$limit": query["limit"]}) diff --git a/src/maggma/api/utils.py b/src/maggma/api/utils.py index 18df8d12f..bb4dfcd02 100644 --- a/src/maggma/api/utils.py +++ b/src/maggma/api/utils.py @@ -63,7 +63,6 @@ def attach_signature(function: Callable, defaults: dict, annotations: dict): defaults: dictionary of parameters -> default values annotations: dictionary of type annotations for the parameters """ - required_params = [ inspect.Parameter( param, @@ -106,7 +105,6 @@ def api_sanitize( allow_dict_msonable (bool): Whether to allow dictionaries in place of MSONable quantities. Defaults to False """ - models = [ model for model in get_flat_models_from_model(pydantic_model) if issubclass(model, BaseModel) ] # type: list[BaseModel] diff --git a/src/maggma/builders/group_builder.py b/src/maggma/builders/group_builder.py index 2ad7bacb9..6d58f4006 100644 --- a/src/maggma/builders/group_builder.py +++ b/src/maggma/builders/group_builder.py @@ -1,6 +1,7 @@ """ Many-to-Many GroupBuilder. """ + import traceback from abc import ABCMeta, abstractmethod from collections.abc import Iterable, Iterator @@ -183,7 +184,6 @@ def get_ids_to_process(self) -> Iterable: """ Gets the IDs that need to be processed. """ - query = self.query or {} distinct_from_target = list(self.target.distinct(self._target_keys_field, criteria=query)) @@ -217,7 +217,6 @@ def get_groups_from_keys(self, keys) -> set[tuple]: """ Get the groups by grouping_keys for these documents. """ - grouping_keys = self.grouping_keys groups: set[tuple] = set() diff --git a/src/maggma/builders/map_builder.py b/src/maggma/builders/map_builder.py index 75f0f60b2..1e1af4a4e 100644 --- a/src/maggma/builders/map_builder.py +++ b/src/maggma/builders/map_builder.py @@ -1,6 +1,7 @@ """ One-to-One Map Builder and a simple CopyBuilder implementation. """ + import traceback from abc import ABCMeta, abstractmethod from collections.abc import Iterator @@ -102,7 +103,6 @@ def get_items(self): Generic get items for Map Builder designed to perform incremental building. """ - self.logger.info(f"Starting {self.__class__.__name__} Builder") self.ensure_indexes() @@ -126,20 +126,18 @@ def get_items(self): self.total = len(keys) for chunked_keys in grouper(keys, self.chunk_size): chunked_keys = list(chunked_keys) - for doc in list( + yield from list( self.source.query( criteria={self.source.key: {"$in": chunked_keys}}, properties=projection, ) - ): - yield doc + ) def process_item(self, item: dict): """ Generic process items to process a dictionary using a map function. """ - self.logger.debug(f"Processing: {item[self.source.key]}") time_start = time() diff --git a/src/maggma/cli/serial.py b/src/maggma/cli/serial.py index 1c6c9fd5a..c2c28d272 100644 --- a/src/maggma/cli/serial.py +++ b/src/maggma/cli/serial.py @@ -14,7 +14,6 @@ def serial(builder: Builder, no_bars=False): """ Runs the builders using a single process. """ - logger = logging.getLogger("SerialProcessor") builder.connect() diff --git a/src/maggma/cli/source_loader.py b/src/maggma/cli/source_loader.py index 30bfc880a..e2d3f646e 100644 --- a/src/maggma/cli/source_loader.py +++ b/src/maggma/cli/source_loader.py @@ -136,7 +136,6 @@ def find_matching_file(segments, curr_path="./"): in the path relative to the current path Requires all segments match the file path. """ - # If we've gotten to the end of the segment match check to see if a file exists if len(segments) == 0: if Path(curr_path + ".py").exists(): diff --git a/src/maggma/cli/sources/__init__.py b/src/maggma/cli/sources/__init__.py index 90cd0cb81..1e0d3331b 100644 --- a/src/maggma/cli/sources/__init__.py +++ b/src/maggma/cli/sources/__init__.py @@ -1 +1 @@ -""" Dummy module to allow for loading dynamic source files. """ +"""Dummy module to allow for loading dynamic source files.""" diff --git a/src/maggma/core/__init__.py b/src/maggma/core/__init__.py index 9c8cad383..c85aea36a 100644 --- a/src/maggma/core/__init__.py +++ b/src/maggma/core/__init__.py @@ -1,4 +1,5 @@ -""" Core specifications for Maggma. """ +"""Core specifications for Maggma.""" + from maggma.core.builder import Builder from maggma.core.store import DateTimeFormat, Sort, Store, StoreError from maggma.core.validator import Validator diff --git a/src/maggma/stores/__init__.py b/src/maggma/stores/__init__.py index 2b31fb0c1..c7b7f526c 100644 --- a/src/maggma/stores/__init__.py +++ b/src/maggma/stores/__init__.py @@ -1,4 +1,5 @@ -""" Root store module with easy imports for implemented Stores. """ +"""Root store module with easy imports for implemented Stores.""" + from maggma.core import Store from maggma.stores.advanced_stores import AliasingStore, MongograntStore, SandboxStore, VaultStore from maggma.stores.aws import S3Store diff --git a/src/maggma/stores/advanced_stores.py b/src/maggma/stores/advanced_stores.py index a93f72fe5..5b342bad5 100644 --- a/src/maggma/stores/advanced_stores.py +++ b/src/maggma/stores/advanced_stores.py @@ -1,6 +1,7 @@ """ Advanced Stores for behavior outside normal access patterns. """ + import json import os from collections.abc import Iterator @@ -258,7 +259,6 @@ def query( skip: number documents to skip limit: limit on total number of documents returned """ - criteria = criteria if criteria else {} if properties is not None: @@ -314,7 +314,7 @@ def groupby( keys = keys if isinstance(keys, list) else [keys] # Make the aliasing transformations on keys - keys = [self.aliases[k] if k in self.aliases else k for k in keys] + keys = [self.aliases.get(k, k) for k in keys] # Update criteria and properties based on aliases criteria = criteria if criteria else {} diff --git a/src/maggma/stores/aws.py b/src/maggma/stores/aws.py index 13f45b76e..3477fffc0 100644 --- a/src/maggma/stores/aws.py +++ b/src/maggma/stores/aws.py @@ -375,7 +375,8 @@ def _get_endpoint_url(self): def _get_bucket(self): """If on the main thread return the bucket created above, else create a new - bucket on each thread.""" + bucket on each thread. + """ if threading.current_thread().name == "MainThread": return self.s3_bucket diff --git a/src/maggma/stores/azure.py b/src/maggma/stores/azure.py index 906bd7b14..6203f0524 100644 --- a/src/maggma/stores/azure.py +++ b/src/maggma/stores/azure.py @@ -1,6 +1,7 @@ """ Advanced Stores for connecting to Microsoft Azure data. """ + import os import threading import warnings @@ -131,7 +132,6 @@ def connect(self, *args, **kwargs): # lgtm[py/conflicting-attributes] """ Connect to the source data. """ - service_client = self._get_service_client() if not self.service: @@ -177,7 +177,6 @@ def count(self, criteria: Optional[dict] = None) -> int: Args: criteria: PyMongo filter for documents to count in """ - return self.index.count(criteria) def query( @@ -200,7 +199,6 @@ def query( limit: limit on total number of documents returned """ - if self.container is None or self.service is None: raise RuntimeError("The store has not been connected") @@ -315,7 +313,6 @@ def update( field is to be used additional_metadata: field(s) to include in the blob store's metadata """ - if self.container is None or self.service is None: raise RuntimeError("The store has not been connected") @@ -443,7 +440,6 @@ def _sanitize_key(self, key): Sanitize keys to store metadata. The metadata keys should adhere to the naming rules for C# identifiers. """ - new_key = str(key) for k, v in self.key_sanitize_dict.items(): new_key = new_key.replace(k, v) @@ -503,7 +499,6 @@ def rebuild_index_from_blob_data(self, **kwargs): Relies on the index document being stores as the metadata for the file This can help recover lost databases. """ - objects = self.container.list_blobs(name_starts_with=self.sub_dir) for obj in objects: # handle the case where there are subdirs in the chosen container diff --git a/src/maggma/stores/compound_stores.py b/src/maggma/stores/compound_stores.py index 1e2ce1bb8..8bc58450e 100644 --- a/src/maggma/stores/compound_stores.py +++ b/src/maggma/stores/compound_stores.py @@ -1,4 +1,5 @@ -""" Special stores that combine underlying Stores together. """ +"""Special stores that combine underlying Stores together.""" + from collections.abc import Iterator from datetime import datetime from itertools import groupby @@ -105,7 +106,7 @@ def _collection(self): @property def nonmain_names(self) -> list: """ - alll non-main collection names. + all non-main collection names. """ return list(set(self.collection_names) - {self.main}) @@ -445,8 +446,7 @@ def query( """ # TODO: skip, sort and limit are broken. implement properly for store in self.stores: - for d in store.query(criteria=criteria, properties=properties): - yield d + yield from store.query(criteria=criteria, properties=properties) def groupby( self, @@ -492,7 +492,7 @@ def groupby( docs.extend(group) def key_set(d: dict) -> tuple: - "index function based on passed in keys." + """Index function based on passed in keys.""" return tuple(d.get(k) for k in keys) sorted_docs = sorted(docs, key=key_set) diff --git a/src/maggma/stores/gridfs.py b/src/maggma/stores/gridfs.py index bb6b9ae0f..5dc155753 100644 --- a/src/maggma/stores/gridfs.py +++ b/src/maggma/stores/gridfs.py @@ -75,7 +75,6 @@ def __init__( auth_source: The database to authenticate on. Defaults to the database name. ssh_tunnel: An SSHTunnel object to use. """ - self.database = database self.collection_name = collection_name self.host = host @@ -316,7 +315,6 @@ def groupby( Returns: generator returning tuples of (dict, list of docs) """ - criteria = self.transform_criteria(criteria) if isinstance(criteria, dict) else criteria keys = [keys] if not isinstance(keys, list) else keys keys = [ @@ -363,7 +361,6 @@ def update( field is to be used additional_metadata: field(s) to include in the gridfs metadata """ - if not isinstance(docs, list): docs = [docs] @@ -464,7 +461,6 @@ def __init__( ensure_metadata: ensure returned documents have the metadata fields searchable_fields: fields to keep in the index store. """ - self.uri = uri # parse the dbname from the uri diff --git a/src/maggma/stores/mongolike.py b/src/maggma/stores/mongolike.py index 83d4098fa..40e9a1856 100644 --- a/src/maggma/stores/mongolike.py +++ b/src/maggma/stores/mongolike.py @@ -169,7 +169,6 @@ def distinct(self, field: str, criteria: Optional[dict] = None, all_exist: bool field: the field(s) to get distinct values for criteria: PyMongo filter for documents to search in """ - criteria = criteria or {} try: distinct_vals = self._collection.distinct(field, criteria) @@ -270,7 +269,6 @@ def count( hint: Dictionary of indexes to use as hints for query optimizer. Keys are field names and values are 1 for ascending or -1 for descending. """ - criteria = criteria if criteria else {} hint_list = ( @@ -351,7 +349,6 @@ def ensure_index(self, key: str, unique: Optional[bool] = False) -> bool: Returns: bool indicating if the index exists/was created """ - if confirm_field_index(self._collection, key): return True @@ -372,7 +369,6 @@ def update(self, docs: Union[list[dict], dict], key: Union[list, str, None] = No a single field, or None if the Store's key field is to be used """ - requests = [] if not isinstance(docs, list): @@ -916,7 +912,6 @@ def update(self, docs: Union[list[dict], dict], key: Union[list, str, None] = No multiple fields, a single field, or None if the Store's key field is to be used. """ - if not isinstance(docs, list): docs = [docs] diff --git a/src/maggma/stores/open_data.py b/src/maggma/stores/open_data.py index 876313c08..c4eb1514c 100644 --- a/src/maggma/stores/open_data.py +++ b/src/maggma/stores/open_data.py @@ -492,7 +492,7 @@ def __init__( self.object_grouping = object_grouping if object_grouping is not None else ["nelements", "symmetry_number"] if access_as_public_bucket: - kwargs["s3_resource_kwargs"] = kwargs["s3_resource_kwargs"] if "s3_resource_kwargs" in kwargs else {} + kwargs["s3_resource_kwargs"] = kwargs.get("s3_resource_kwargs", {}) kwargs["s3_resource_kwargs"]["config"] = Config(signature_version=UNSIGNED) super().__init__(**kwargs) self.searchable_fields = list( @@ -628,7 +628,7 @@ def _write_doc_and_update_index(self, items: pd.DataFrame, index: pd.DataFrame) self.index.update(index) def _write_doc_to_s3(self, doc: pd.DataFrame, index: pd.DataFrame) -> None: - doc = doc.replace({pd.NaT: None}).replace({"NaT": None}).replace({np.NaN: None}) + doc = doc.replace({pd.NaT: None}).replace({"NaT": None}).replace({np.nan: None}) string_io = StringIO() with jsonlines.Writer(string_io, dumps=json_util.dumps) as writer: diff --git a/src/maggma/stores/ssh_tunnel.py b/src/maggma/stores/ssh_tunnel.py index 510837e79..5f53a34fc 100644 --- a/src/maggma/stores/ssh_tunnel.py +++ b/src/maggma/stores/ssh_tunnel.py @@ -32,7 +32,6 @@ def __init__( private_key: ssh private key to authenticate to the tunnel server kwargs: any extra args passed to the SSHTunnelForwarder. """ - self.tunnel_server_address = tunnel_server_address self.remote_server_address = remote_server_address self.local_port = local_port diff --git a/src/maggma/utils.py b/src/maggma/utils.py index c4fe278e9..6abec0f51 100644 --- a/src/maggma/utils.py +++ b/src/maggma/utils.py @@ -1,6 +1,7 @@ """ Utilities to help with maggma functions. """ + import itertools import logging import signal @@ -89,7 +90,6 @@ def to_isoformat_ceil_ms(dt: Union[datetime, str]) -> str: def to_dt(s: Union[datetime, str]) -> datetime: """Convert an ISO 8601 string to a datetime.""" - if isinstance(s, str): return parser.parse(s) if isinstance(s, datetime): @@ -112,7 +112,6 @@ def recursive_update(d: dict, u: dict): d (dict): dict to update u (dict): updates to propagate """ - for k, v in u.items(): if k in d: if isinstance(v, dict) and isinstance(d[k], dict): @@ -212,7 +211,6 @@ def dynamic_import(abs_module_path: str, class_name: Optional[str] = None): """ Dynamic class importer from: https://www.bnmetrics.com/blog/dynamic-import-in-python3. """ - if class_name is None: class_name = abs_module_path.split(".")[-1] abs_module_path = ".".join(abs_module_path.split(".")[:-1]) diff --git a/src/maggma/validators.py b/src/maggma/validators.py index 7228ba0e7..2f5749cf2 100644 --- a/src/maggma/validators.py +++ b/src/maggma/validators.py @@ -4,7 +4,6 @@ that Store. """ - from jsonschema import ValidationError, validate from jsonschema.validators import validator_for @@ -84,7 +83,6 @@ def validation_errors(self, doc: dict) -> list[str]: Args: doc - document to check """ - if self.is_valid(doc): return [] diff --git a/tests/builders/test_projection_builder.py b/tests/builders/test_projection_builder.py index 345947e4f..f11f9abe2 100644 --- a/tests/builders/test_projection_builder.py +++ b/tests/builders/test_projection_builder.py @@ -1,6 +1,7 @@ """ Tests for Projection_Builder """ + import pytest from maggma.builders.projection_builder import Projection_Builder @@ -97,7 +98,7 @@ def test_update_targets(source1, source2, target): assert target.query_one(criteria={"k": 0})["newa"] == "a" assert target.query_one(criteria={"k": 0})["d"] == "d" assert target.query_one(criteria={"k": 10})["d"] == "d" - assert "a" not in target.query_one(criteria={"k": 10}).keys() + assert "a" not in target.query_one(criteria={"k": 10}) def test_run(source1, source2, target): @@ -107,7 +108,7 @@ def test_run(source1, source2, target): assert target.query_one(criteria={"k": 0})["a"] == "a" assert target.query_one(criteria={"k": 0})["d"] == "d" assert target.query_one(criteria={"k": 10})["d"] == "d" - assert "a" not in target.query_one(criteria={"k": 10}).keys() + assert "a" not in target.query_one(criteria={"k": 10}) def test_query(source1, source2, target): From c450ad59b1b20c68305fa03e244455158bd074f3 Mon Sep 17 00:00:00 2001 From: Ryan Kingsbury Date: Thu, 20 Jun 2024 16:21:13 -0400 Subject: [PATCH 5/5] pre-commit run --all-files --- docs/getting_started/simple_builder.md | 2 +- src/maggma/__init__.py | 1 + src/maggma/cli/multiprocessing.py | 5 +++-- tests/builders/test_copy_builder.py | 1 + tests/builders/test_group_builder.py | 1 + tests/stores/test_advanced_stores.py | 1 + tests/test_utils.py | 1 + tests/test_validator.py | 1 + 8 files changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/getting_started/simple_builder.md b/docs/getting_started/simple_builder.md index 199218c96..dddc69611 100644 --- a/docs/getting_started/simple_builder.md +++ b/docs/getting_started/simple_builder.md @@ -112,7 +112,7 @@ One advantage of using the generator approach is it is less memory intensive tha `process_item` just has to do the parallelizable work on each item. Since the item is whatever comes out of `get_items`, you know exactly what it should be. It may be a single document, a list of documents, a mapping, a set, etc. -Our simple process item just has to multiply one field by `self.mulitplier`: +Our simple process item just has to multiply one field by `self.multiplier`: ``` python diff --git a/src/maggma/__init__.py b/src/maggma/__init__.py index 39ade96ae..4e3e7bcf6 100644 --- a/src/maggma/__init__.py +++ b/src/maggma/__init__.py @@ -1,4 +1,5 @@ """Primary Maggma module.""" + from importlib.metadata import PackageNotFoundError, version try: diff --git a/src/maggma/cli/multiprocessing.py b/src/maggma/cli/multiprocessing.py index 42a4e4dfb..ce5885728 100644 --- a/src/maggma/cli/multiprocessing.py +++ b/src/maggma/cli/multiprocessing.py @@ -85,8 +85,9 @@ async def get_from_iterator(self): future = loop.run_in_executor(self.executor, safe_dispatch, (self.func, item)) self.tasks[idx] = future - - loop.create_task(self.process_and_release(idx)) + # TODO - line below raises RUF006 error. Unsure about the best way to + # resolve. See https://docs.astral.sh/ruff/rules/asyncio-dangling-task/ + loop.create_task(self.process_and_release(idx)) # noqa: RUF006 await gather(*self.tasks.values()) self.results.put_nowait(self.done_sentinel) diff --git a/tests/builders/test_copy_builder.py b/tests/builders/test_copy_builder.py index 6a0993844..e0d5790fe 100644 --- a/tests/builders/test_copy_builder.py +++ b/tests/builders/test_copy_builder.py @@ -1,6 +1,7 @@ """ Tests for MapBuilder """ + from datetime import datetime, timedelta import pytest diff --git a/tests/builders/test_group_builder.py b/tests/builders/test_group_builder.py index fe9d63ca0..b3e18295f 100644 --- a/tests/builders/test_group_builder.py +++ b/tests/builders/test_group_builder.py @@ -1,6 +1,7 @@ """ Tests for group builder """ + from datetime import datetime from random import randint diff --git a/tests/stores/test_advanced_stores.py b/tests/stores/test_advanced_stores.py index 4695632d3..cc402e841 100644 --- a/tests/stores/test_advanced_stores.py +++ b/tests/stores/test_advanced_stores.py @@ -1,6 +1,7 @@ """ Tests for advanced stores """ + import os import shutil import signal diff --git a/tests/test_utils.py b/tests/test_utils.py index 164ae08a9..e0a8e3dc0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ """ Tests for builders """ + from datetime import datetime from time import sleep diff --git a/tests/test_validator.py b/tests/test_validator.py index 83d4734bc..980cf48dc 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,6 +1,7 @@ """ Tests the validators """ + import pytest from monty.json import MSONable