Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added sanitize_np function, which is faster than sanitize_doc #71

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
47 changes: 47 additions & 0 deletions event_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import uuid
import warnings
from ._version import get_versions
import numpy as np
import collections

__all__ = ['DocumentNames', 'schemas', 'compose_run']

Expand Down Expand Up @@ -896,6 +898,51 @@ def verify_filled(event_page):
f"`event_model.Filler`.")


def sanitize_np(doc):
'''Return a copy with any numpy objects converted to built-in Python types.
This is a faster version of sanitize_doc which only converts numpy objects.

This function takes in an event-model document and returns a copy with any
numpy objects converted to built-in Python types. It is useful for
sanitizing documents prior to sending to any consumer that does not
recognize numpy types, such as a MongoDB database or a JSON encoder.

Parameters
----------
doc : dict
The event-model document to be sanitized

Returns
-------
sanitized_doc : event-model document
The event-model document with numpy objects converted to built-in
Python types.
'''

def iterate_sanitize(doc):
if hasattr(doc, 'items'):
for value in doc.values():
klauer marked this conversation as resolved.
Show resolved Hide resolved
iterate_sanitize(value)
elif isinstance(doc, collections.abc.Iterable) and not isinstance(str):
doc = list(doc) # Change tuples to lists
klauer marked this conversation as resolved.
Show resolved Hide resolved
for value in doc:
iterate_sanitize(value)
else:
doc = sanitize_item(doc)

iterate_sanitize(doc)
klauer marked this conversation as resolved.
Show resolved Hide resolved
return doc


def sanitize_item(val):
"Convert any numpy objects into built-in Python types."
if isinstance(val, (np.generic, np.ndarray)):
if np.isscalar(val):
return val.item()
return val.tolist()
return val


def sanitize_doc(doc):
'''Return a copy with any numpy objects converted to built-in Python types.

Expand Down