Source code for tensorpack.utils.serialize

# -*- coding: utf-8 -*-
# File: serialize.py

import os

import pickle
from multiprocessing.reduction import ForkingPickler
import msgpack
import msgpack_numpy

msgpack_numpy.patch()
assert msgpack.version >= (0, 5, 2)

__all__ = ['loads', 'dumps']


MAX_MSGPACK_LEN = 1000000000


class MsgpackSerializer(object):

    @staticmethod
    def dumps(obj):
        """
        Serialize an object.

        Returns:
            Implementation-dependent bytes-like object.
        """
        return msgpack.dumps(obj, use_bin_type=True)

    @staticmethod
    def loads(buf):
        """
        Args:
            buf: the output of `dumps`.
        """
        # Since 0.6, the default max size was set to 1MB.
        # We change it to approximately 1G.
        return msgpack.loads(buf, raw=False,
                             max_bin_len=MAX_MSGPACK_LEN,
                             max_array_len=MAX_MSGPACK_LEN,
                             max_map_len=MAX_MSGPACK_LEN,
                             max_str_len=MAX_MSGPACK_LEN)


class PyarrowSerializer(object):
    @staticmethod
    def dumps(obj):
        """
        Serialize an object.

        Returns:
            Implementation-dependent bytes-like object.
            May not be compatible across different versions of pyarrow.
        """
        import pyarrow as pa
        return pa.serialize(obj).to_buffer()

    @staticmethod
    def dumps_bytes(obj):
        """
        Returns:
            bytes
        """
        return PyarrowSerializer.dumps(obj).to_pybytes()

    @staticmethod
    def loads(buf):
        """
        Args:
            buf: the output of `dumps` or `dumps_bytes`.
        """
        import pyarrow as pa
        return pa.deserialize(buf)


class PickleSerializer(object):
    @staticmethod
    def dumps(obj):
        """
        Returns:
            bytes
        """
        return pickle.dumps(obj, protocol=-1)

    @staticmethod
    def loads(buf):
        """
        Args:
            bytes
        """
        return pickle.loads(buf)


# Define the default serializer to be used that dumps data to bytes
_DEFAULT_S = os.environ.get('TENSORPACK_SERIALIZE', 'pickle')

if _DEFAULT_S == "pyarrow":
    dumps = PyarrowSerializer.dumps_bytes
    loads = PyarrowSerializer.loads
elif _DEFAULT_S == "pickle":
    dumps = PickleSerializer.dumps
    loads = PickleSerializer.loads
else:
    dumps = MsgpackSerializer.dumps
    loads = MsgpackSerializer.loads

# Define the default serializer to be used for passing data
# among a pair of peers. In this case the deserialization is
# known to happen only once
_DEFAULT_S = os.environ.get('TENSORPACK_ONCE_SERIALIZE', 'pickle')

if _DEFAULT_S == "pyarrow":
    dumps_once = PyarrowSerializer.dumps
    loads_once = PyarrowSerializer.loads
elif _DEFAULT_S == "pickle":
    dumps_once = ForkingPickler.dumps
    loads_once = ForkingPickler.loads
else:
    dumps_once = MsgpackSerializer.dumps
    loads_once = MsgpackSerializer.loads