Skip to content

Commit

Permalink
ENH: add infer_datakeys helper
Browse files Browse the repository at this point in the history
This will extract the shape, (json)dtype, and the detailed numpy dtype
information for a value.
  • Loading branch information
tacaswell committed Sep 10, 2021
1 parent 686faa8 commit c912871
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 1 deletion.
50 changes: 49 additions & 1 deletion event_model/_numpy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json

import typing
import numpy

from ._errors import EventModelValueError


def sanitize_doc(doc):
"""Return a copy with any numpy objects converted to built-in Python types.
Expand Down Expand Up @@ -51,3 +53,49 @@ def default(self, obj):
return obj.item()
return obj.tolist()
return json.JSONEncoder.default(self, obj)


def infer_datakeys(val):
"""
Given a value, infer what the datatype (as Ewent Model would describe it).
Parameters
----------
val : Any
"""
bad_iterables = (str, bytes, dict)
_type_map = {
"number": (float, numpy.floating, complex),
"array": (numpy.ndarray, list, tuple),
"string": (str,),
"integer": (int, numpy.integer),
}

if isinstance(val, typing.Iterable) and not isinstance(val, bad_iterables):
dtype = "array"
else:
for json_type, py_types in _type_map.items():
if isinstance(val, py_types):
dtype = json_type
break
else:
raise EventModelValueError(
f"Cannot determine the appropriate bluesky-friendly data type for "
f"value {val} of Python type {type(val)}. "
f"Supported types include: int, float, str, and iterables such as "
f"list, tuple, np.ndarray, and so on."
)

# this should only make a copy if it _has to_. If we have lots of
# non-already-numpy arrays flowing through and this is doing things like
# computing huge dask arrays etc.
arr_val = numpy.asanyarray(val)
arr_dtype = arr_val.dtype

return {
"dtype": dtype,
"dtype_str": arr_dtype.str,
"dtype_descr": arr_dtype.descr,
"shape": list(arr_val.shape),
}
44 changes: 44 additions & 0 deletions event_model/tests/test_numpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pytest
import numpy as np


from event_model._numpy import infer_datakeys
from event_model._errors import EventModelValueError


@pytest.mark.parametrize("shape", [[1], [2, 2]])
@pytest.mark.parametrize(
"dtype", ["i8", "f2", "c16", np.dtype([("a", "i"), ("b", "f")])]
)
def test_infer_dtypes_array(shape, dtype):
v = np.ones(shape, dtype=dtype)
if isinstance(dtype, str):
dtype = np.dtype(dtype)

ret = infer_datakeys(v)

assert ret["dtype"] == "array"
assert ret["shape"] == list(shape)
assert ret["dtype_str"] == dtype.str
assert ret["dtype_descr"] == dtype.descr


@pytest.mark.parametrize("val", [{}, b"bob"])
def test_infer_fail(val):
with pytest.raises(EventModelValueError):
infer_datakeys(val)


@pytest.mark.parametrize(
"value,dtype",
[("bob", "string"), (1, "integer"), (1.0, "number"), (1 + 1j, "number")],
)
def test_infer_dtypes_scalar(value, dtype):

ret = infer_datakeys(value)
np_dt = np.array(value).dtype
assert ret["dtype"] == dtype
assert ret["shape"] == []

assert ret["dtype_str"] == np_dt.str
assert ret["dtype_descr"] == np_dt.descr

0 comments on commit c912871

Please sign in to comment.