Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DONE] Numpy generic serialization #7

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
FROM python:3.7
FROM python:3.8.6-slim-buster

LABEL maintainer='jelle.prins <[email protected]>'
LABEL py_version='3.7'
LABEL py_version='3.8.6'

# Change the date to force rebuilding the whole image.
ENV REFRESHED_AT 2019-02-25
ENV REFRESHED_AT 2020-01-27

WORKDIR /code
COPY requirements_base.txt requirements_dev.txt /code/

#RUN apt-get update && apt-get install -y --no-install-recommends \
# build-essential \
# && pip3 install -r requirements_dev.txt\
# && apt-get remove -y\
# build-essential \
# && rm -rf /root/.cache/pip \
# && apt-get autoremove -y\
# && rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir -r requirements_dev.txt
5 changes: 4 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
0.1.8 (unreleased)
------------------

- Nothing changed yet.
- Dropped Numpy save/load file serialize/deserialization for
a more generic approach that uses dictionaries instead. This
makes it easier to deserialize the Numpy array's in other
programming languages.


0.1.7 (2020-01-10)
Expand Down
99 changes: 85 additions & 14 deletions asyncio_rpc/serialization/msgpack.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import dataclasses
from pydantic import BaseModel
import msgpack
import numpy as np
from abc import ABC, abstractmethod
from io import BytesIO
from datetime import datetime
from lz4.frame import compress as lz4_compress, decompress as lz4_decompress
from typing import Any
Expand All @@ -17,7 +17,12 @@
# this on the module...
REGISTRY = {'obj_types': {},
'ext_types': {},
'serializables': {}}
'serializables': {},
'pydantic_serializables': {}}


class DtypeNotSupported(Exception):
pass


def register(obj_def):
Expand All @@ -26,7 +31,23 @@ def register(obj_def):

For example obj_def and required methods, see NumpyArray below
"""
if dataclasses.is_dataclass(obj_def):
mro = []
if hasattr(obj_def, 'mro'):
try:
mro = obj_def.mro()
except ValueError:
pass

if BaseModel in mro:
# Pydantic support
class_name = obj_def.__name__
REGISTRY['pydantic_serializables'][class_name] = obj_def
REGISTRY['obj_types'][obj_def] = PydanticHandler
# Register the DataclassHandler if not done already
if PydanticHandler.ext_type not in REGISTRY['ext_types']:
REGISTRY['ext_types'][
PydanticHandler.ext_type] = PydanticHandler
elif dataclasses.is_dataclass(obj_def):
# Handle dataclasses, every dataclass needs to be registered
# via register.
class_name = obj_def.__name__
Expand Down Expand Up @@ -66,27 +87,45 @@ def unpackb(cls, data: bytes) -> Any:

class NumpyArrayHandler(AbstractHandler):
"""
Use np.save and np.load to serialize/deserialize
Use dictionairies to serialize/deserialize
numpy array's.

"""
ext_type = 1
obj_type = np.ndarray

# Note:
# More generic approach, but a bit slower than
# packing it as a list/tuple with (dtype, shape, bytes)
# Note: numpy save/load file approach
# is a bit harder with other programming
# languages. So use dictionaires instead.
#
# Note2: Currently you cannot pack/unpack
# array's with dtype=object
@classmethod
def packb(cls, array: np.ndarray) -> bytes:
buf = BytesIO()
np.save(buf, array)
buf.seek(0)
return buf.read()
if str(array.dtype) == 'object':
raise DtypeNotSupported(
"Numpy dtype: %s is not supported" % array.dtype)

return dumpb({
'shape': array.shape,
'dtype': str(array.dtype),
'fortran_order': np.isfortran(array),
'data': array.tobytes()
}, do_compress=False)

@classmethod
def unpackb(cls, data: bytes) -> np.ndarray:
buf = BytesIO(data)
buf.seek(0)
return np.load(buf)
data = loadb(data, do_decompress=False)
if data['dtype'] == 'object':
raise DtypeNotSupported(
"Numpy dtype: %s is not supported" % data['dtype'])
res = np.frombuffer(
data['data'], dtype=data['dtype']).reshape(
data['shape']
)
if data['fortran_order']:
res = np.asfortranarray(res)
return res


class NumpyStructuredArrayHandler(NumpyArrayHandler):
Expand Down Expand Up @@ -142,6 +181,37 @@ def unpackb(cls, data):
return klass(**data)


class PydanticHandler:
"""
Serialize pydantic models by serializing the dict
of pydantic models.
"""
ext_type = 6

@classmethod
def packb(cls, obj) -> bytes:
dataclass_name = obj.__class__.__name__
if isinstance(dataclass_name, str):
dataclass_name = dataclass_name

# Recursively process dataclasses of the dataclass,
# serialize as tuple(dataclass_name, __dict__)
return dumpb(
(dataclass_name, obj.dict()),
do_compress=False)

@classmethod
def unpackb(cls, data):
# Recursively process the contents of the dataclass
classname, data = loadb(
data, do_decompress=False, raw=False)
# Return registered class or Serializable (as default)
assert classname in REGISTRY['pydantic_serializables'], \
f'class {classname} not yet registered'
klass = REGISTRY['pydantic_serializables'][classname]
return klass(**data)


class SliceHandler:
"""
Serialize slices
Expand Down Expand Up @@ -223,5 +293,6 @@ def loadb(packed: bytes, do_decompress=True, decompress_func=lz4_decompress,
decompress_func = do_nothing
return msgpack.unpackb(
decompress_func(packed), ext_hook=ext_hook,
strict_map_key=False,
max_ext_len=MAX_EXT_LEN,
max_str_len=MAX_STR_LEN, raw=raw)
8 changes: 4 additions & 4 deletions requirements_base.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
msgpack==0.6.0
lz4==2.1.6
aioredis==1.2.0
numpy==1.16.1
msgpack
lz4
aioredis
numpy