From 5f83803041aeadb8c483eab8b8ce7fd8c2042f2c Mon Sep 17 00:00:00 2001 From: jpprins1 Date: Fri, 31 Jul 2020 13:12:28 +0200 Subject: [PATCH 1/3] Numpy generic serialization --- HISTORY.rst | 5 +++- asyncio_rpc/serialization/msgpack.py | 44 +++++++++++++++++++++------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 8387e70..1d2e128 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,7 +1,10 @@ 0.1.8 (unreleased) ------------------ -- Nothing changed yet. +- Dropped Numpy save/load file serialize/deserialization for + a more generic approach that uses dictionaries instead. This + makes it easier to deserialize the Numpy array's in other + programming languages. 0.1.7 (2020-01-10) diff --git a/asyncio_rpc/serialization/msgpack.py b/asyncio_rpc/serialization/msgpack.py index 9434b8e..57cfe84 100644 --- a/asyncio_rpc/serialization/msgpack.py +++ b/asyncio_rpc/serialization/msgpack.py @@ -20,6 +20,10 @@ 'serializables': {}} +class DtypeNotSupported(Exception): + pass + + def register(obj_def): """ Register dataclasses or custom handlers in the registry. @@ -66,27 +70,45 @@ def unpackb(cls, data: bytes) -> Any: class NumpyArrayHandler(AbstractHandler): """ - Use np.save and np.load to serialize/deserialize + Use dictionairies to serialize/deserialize numpy array's. + """ ext_type = 1 obj_type = np.ndarray - # Note: - # More generic approach, but a bit slower than - # packing it as a list/tuple with (dtype, shape, bytes) + # Note: numpy save/load file approach + # is a bit harder with other programming + # languages. So use dictionaires instead. + # + # Note2: Currently you cannot pack/unpack + # array's with dtype=object @classmethod def packb(cls, array: np.ndarray) -> bytes: - buf = BytesIO() - np.save(buf, array) - buf.seek(0) - return buf.read() + if str(array.dtype) == 'object': + raise DtypeNotSupported( + "Numpy dtype: %s is not supported" % array.dtype) + + return dumpb({ + 'shape': array.shape, + 'dtype': str(array.dtype), + 'fortran_order': np.isfortran(array), + 'data': array.tobytes() + }, do_compress=False) @classmethod def unpackb(cls, data: bytes) -> np.ndarray: - buf = BytesIO(data) - buf.seek(0) - return np.load(buf) + data = loadb(data, do_decompress=False) + if data['dtype'] == 'object': + raise DtypeNotSupported( + "Numpy dtype: %s is not supported" % data['dtype']) + res = np.frombuffer( + data['data'], dtype=data['dtype']).reshape( + data['shape'] + ) + if data['fortran_order']: + res = np.asfortranarray(res) + return res class NumpyStructuredArrayHandler(NumpyArrayHandler): From f3fe225227d38e8ea3274f6bab7d1bdca4785fe8 Mon Sep 17 00:00:00 2001 From: jpprins1 Date: Fri, 31 Jul 2020 13:16:31 +0200 Subject: [PATCH 2/3] removed bytesio import --- asyncio_rpc/serialization/msgpack.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asyncio_rpc/serialization/msgpack.py b/asyncio_rpc/serialization/msgpack.py index 57cfe84..f90e8c0 100644 --- a/asyncio_rpc/serialization/msgpack.py +++ b/asyncio_rpc/serialization/msgpack.py @@ -2,7 +2,6 @@ import msgpack import numpy as np from abc import ABC, abstractmethod -from io import BytesIO from datetime import datetime from lz4.frame import compress as lz4_compress, decompress as lz4_decompress from typing import Any From aa428353f046aaf745c8e009b860390b1a19c5d1 Mon Sep 17 00:00:00 2001 From: jpprins1 Date: Fri, 5 Feb 2021 09:48:26 +0100 Subject: [PATCH 3/3] WIP --- Dockerfile | 16 +++++++-- asyncio_rpc/serialization/msgpack.py | 54 ++++++++++++++++++++++++++-- requirements_base.txt | 8 ++--- 3 files changed, 69 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index fa40a70..074a72f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,21 @@ -FROM python:3.7 +FROM python:3.8.6-slim-buster LABEL maintainer='jelle.prins ' -LABEL py_version='3.7' +LABEL py_version='3.8.6' # Change the date to force rebuilding the whole image. -ENV REFRESHED_AT 2019-02-25 +ENV REFRESHED_AT 2020-01-27 WORKDIR /code COPY requirements_base.txt requirements_dev.txt /code/ + +#RUN apt-get update && apt-get install -y --no-install-recommends \ +# build-essential \ +# && pip3 install -r requirements_dev.txt\ +# && apt-get remove -y\ +# build-essential \ +# && rm -rf /root/.cache/pip \ +# && apt-get autoremove -y\ +# && rm -rf /var/lib/apt/lists/* + RUN pip install --no-cache-dir -r requirements_dev.txt diff --git a/asyncio_rpc/serialization/msgpack.py b/asyncio_rpc/serialization/msgpack.py index f90e8c0..657b194 100644 --- a/asyncio_rpc/serialization/msgpack.py +++ b/asyncio_rpc/serialization/msgpack.py @@ -1,4 +1,5 @@ import dataclasses +from pydantic import BaseModel import msgpack import numpy as np from abc import ABC, abstractmethod @@ -16,7 +17,8 @@ # this on the module... REGISTRY = {'obj_types': {}, 'ext_types': {}, - 'serializables': {}} + 'serializables': {}, + 'pydantic_serializables': {}} class DtypeNotSupported(Exception): @@ -29,7 +31,23 @@ def register(obj_def): For example obj_def and required methods, see NumpyArray below """ - if dataclasses.is_dataclass(obj_def): + mro = [] + if hasattr(obj_def, 'mro'): + try: + mro = obj_def.mro() + except ValueError: + pass + + if BaseModel in mro: + # Pydantic support + class_name = obj_def.__name__ + REGISTRY['pydantic_serializables'][class_name] = obj_def + REGISTRY['obj_types'][obj_def] = PydanticHandler + # Register the DataclassHandler if not done already + if PydanticHandler.ext_type not in REGISTRY['ext_types']: + REGISTRY['ext_types'][ + PydanticHandler.ext_type] = PydanticHandler + elif dataclasses.is_dataclass(obj_def): # Handle dataclasses, every dataclass needs to be registered # via register. class_name = obj_def.__name__ @@ -163,6 +181,37 @@ def unpackb(cls, data): return klass(**data) +class PydanticHandler: + """ + Serialize pydantic models by serializing the dict + of pydantic models. + """ + ext_type = 6 + + @classmethod + def packb(cls, obj) -> bytes: + dataclass_name = obj.__class__.__name__ + if isinstance(dataclass_name, str): + dataclass_name = dataclass_name + + # Recursively process dataclasses of the dataclass, + # serialize as tuple(dataclass_name, __dict__) + return dumpb( + (dataclass_name, obj.dict()), + do_compress=False) + + @classmethod + def unpackb(cls, data): + # Recursively process the contents of the dataclass + classname, data = loadb( + data, do_decompress=False, raw=False) + # Return registered class or Serializable (as default) + assert classname in REGISTRY['pydantic_serializables'], \ + f'class {classname} not yet registered' + klass = REGISTRY['pydantic_serializables'][classname] + return klass(**data) + + class SliceHandler: """ Serialize slices @@ -244,5 +293,6 @@ def loadb(packed: bytes, do_decompress=True, decompress_func=lz4_decompress, decompress_func = do_nothing return msgpack.unpackb( decompress_func(packed), ext_hook=ext_hook, + strict_map_key=False, max_ext_len=MAX_EXT_LEN, max_str_len=MAX_STR_LEN, raw=raw) diff --git a/requirements_base.txt b/requirements_base.txt index cd8da1f..14361fa 100644 --- a/requirements_base.txt +++ b/requirements_base.txt @@ -1,4 +1,4 @@ -msgpack==0.6.0 -lz4==2.1.6 -aioredis==1.2.0 -numpy==1.16.1 +msgpack +lz4 +aioredis +numpy