123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831 |
- """
- Msgpack serializer support for reading and writing pandas data structures
- to disk
- portions of msgpack_numpy package, by Lev Givon were incorporated
- into this module (and tests_packers.py)
- License
- =======
- Copyright (c) 2013, Lev Givon.
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials provided
- with the distribution.
- * Neither the name of Lev Givon nor the names of any
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- """
- from datetime import date, datetime, timedelta
- import os
- from textwrap import dedent
- import warnings
- from dateutil.parser import parse
- import numpy as np
- import pandas.compat as compat
- from pandas.compat import u, u_safe
- from pandas.errors import PerformanceWarning
- from pandas.util._move import (
- BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer)
- from pandas.core.dtypes.common import (
- is_categorical_dtype, is_datetime64tz_dtype, is_object_dtype,
- needs_i8_conversion, pandas_dtype)
- from pandas import ( # noqa:F401
- Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index,
- Index, Int64Index, Interval, IntervalIndex, MultiIndex, NaT, Panel, Period,
- PeriodIndex, RangeIndex, Series, TimedeltaIndex, Timestamp)
- from pandas.core import internals
- from pandas.core.arrays import DatetimeArray, IntervalArray, PeriodArray
- from pandas.core.arrays.sparse import BlockIndex, IntIndex
- from pandas.core.generic import NDFrame
- from pandas.core.internals import BlockManager, _safe_reshape, make_block
- from pandas.core.sparse.api import SparseDataFrame, SparseSeries
- from pandas.io.common import _stringify_path, get_filepath_or_buffer
- from pandas.io.msgpack import ExtType, Packer as _Packer, Unpacker as _Unpacker
- # check which compression libs we have installed
- try:
- import zlib
- def _check_zlib():
- pass
- except ImportError:
- def _check_zlib():
- raise ImportError('zlib is not installed')
- _check_zlib.__doc__ = dedent(
- """\
- Check if zlib is installed.
- Raises
- ------
- ImportError
- Raised when zlib is not installed.
- """,
- )
- try:
- import blosc
- def _check_blosc():
- pass
- except ImportError:
- def _check_blosc():
- raise ImportError('blosc is not installed')
- _check_blosc.__doc__ = dedent(
- """\
- Check if blosc is installed.
- Raises
- ------
- ImportError
- Raised when blosc is not installed.
- """,
- )
- # until we can pass this into our conversion functions,
- # this is pretty hacky
- compressor = None
- def to_msgpack(path_or_buf, *args, **kwargs):
- """
- msgpack (serialize) object to input file path
- THIS IS AN EXPERIMENTAL LIBRARY and the storage format
- may not be stable until a future release.
- Parameters
- ----------
- path_or_buf : string File path, buffer-like, or None
- if None, return generated string
- args : an object or objects to serialize
- encoding : encoding for unicode objects
- append : boolean whether to append to an existing msgpack
- (default is False)
- compress : type of compressor (zlib or blosc), default to None (no
- compression)
- """
- global compressor
- compressor = kwargs.pop('compress', None)
- if compressor:
- compressor = u(compressor)
- append = kwargs.pop('append', None)
- if append:
- mode = 'a+b'
- else:
- mode = 'wb'
- def writer(fh):
- for a in args:
- fh.write(pack(a, **kwargs))
- path_or_buf = _stringify_path(path_or_buf)
- if isinstance(path_or_buf, compat.string_types):
- with open(path_or_buf, mode) as fh:
- writer(fh)
- elif path_or_buf is None:
- buf = compat.BytesIO()
- writer(buf)
- return buf.getvalue()
- else:
- writer(path_or_buf)
- def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
- """
- Load msgpack pandas object from the specified
- file path
- THIS IS AN EXPERIMENTAL LIBRARY and the storage format
- may not be stable until a future release.
- Parameters
- ----------
- path_or_buf : string File path, BytesIO like or string
- encoding : Encoding for decoding msgpack str type
- iterator : boolean, if True, return an iterator to the unpacker
- (default is False)
- Returns
- -------
- obj : same type as object stored in file
- """
- path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf)
- if iterator:
- return Iterator(path_or_buf)
- def read(fh):
- unpacked_obj = list(unpack(fh, encoding=encoding, **kwargs))
- if len(unpacked_obj) == 1:
- return unpacked_obj[0]
- if should_close:
- try:
- path_or_buf.close()
- except IOError:
- pass
- return unpacked_obj
- # see if we have an actual file
- if isinstance(path_or_buf, compat.string_types):
- try:
- exists = os.path.exists(path_or_buf)
- except (TypeError, ValueError):
- exists = False
- if exists:
- with open(path_or_buf, 'rb') as fh:
- return read(fh)
- if isinstance(path_or_buf, compat.binary_type):
- # treat as a binary-like
- fh = None
- try:
- # We can't distinguish between a path and a buffer of bytes in
- # Python 2 so instead assume the first byte of a valid path is
- # less than 0x80.
- if compat.PY3 or ord(path_or_buf[0]) >= 0x80:
- fh = compat.BytesIO(path_or_buf)
- return read(fh)
- finally:
- if fh is not None:
- fh.close()
- elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read):
- # treat as a buffer like
- return read(path_or_buf)
- raise ValueError('path_or_buf needs to be a string file path or file-like')
- dtype_dict = {21: np.dtype('M8[ns]'),
- u('datetime64[ns]'): np.dtype('M8[ns]'),
- u('datetime64[us]'): np.dtype('M8[us]'),
- 22: np.dtype('m8[ns]'),
- u('timedelta64[ns]'): np.dtype('m8[ns]'),
- u('timedelta64[us]'): np.dtype('m8[us]'),
- # this is platform int, which we need to remap to np.int64
- # for compat on windows platforms
- 7: np.dtype('int64'),
- 'category': 'category'
- }
- def dtype_for(t):
- """ return my dtype mapping, whether number or name """
- if t in dtype_dict:
- return dtype_dict[t]
- return np.typeDict.get(t, t)
- c2f_dict = {'complex': np.float64,
- 'complex128': np.float64,
- 'complex64': np.float32}
- # windows (32 bit) compat
- if hasattr(np, 'float128'):
- c2f_dict['complex256'] = np.float128
- def c2f(r, i, ctype_name):
- """
- Convert strings to complex number instance with specified numpy type.
- """
- ftype = c2f_dict[ctype_name]
- return np.typeDict[ctype_name](ftype(r) + 1j * ftype(i))
- def convert(values):
- """ convert the numpy values to a list """
- dtype = values.dtype
- if is_categorical_dtype(values):
- return values
- elif is_object_dtype(dtype):
- return values.ravel().tolist()
- if needs_i8_conversion(dtype):
- values = values.view('i8')
- v = values.ravel()
- if compressor == 'zlib':
- _check_zlib()
- # return string arrays like they are
- if dtype == np.object_:
- return v.tolist()
- # convert to a bytes array
- v = v.tostring()
- return ExtType(0, zlib.compress(v))
- elif compressor == 'blosc':
- _check_blosc()
- # return string arrays like they are
- if dtype == np.object_:
- return v.tolist()
- # convert to a bytes array
- v = v.tostring()
- return ExtType(0, blosc.compress(v, typesize=dtype.itemsize))
- # ndarray (on original dtype)
- return ExtType(0, v.tostring())
- def unconvert(values, dtype, compress=None):
- as_is_ext = isinstance(values, ExtType) and values.code == 0
- if as_is_ext:
- values = values.data
- if is_categorical_dtype(dtype):
- return values
- elif is_object_dtype(dtype):
- return np.array(values, dtype=object)
- dtype = pandas_dtype(dtype).base
- if not as_is_ext:
- values = values.encode('latin1')
- if compress:
- if compress == u'zlib':
- _check_zlib()
- decompress = zlib.decompress
- elif compress == u'blosc':
- _check_blosc()
- decompress = blosc.decompress
- else:
- raise ValueError("compress must be one of 'zlib' or 'blosc'")
- try:
- return np.frombuffer(
- _move_into_mutable_buffer(decompress(values)),
- dtype=dtype,
- )
- except _BadMove as e:
- # Pull the decompressed data off of the `_BadMove` exception.
- # We don't just store this in the locals because we want to
- # minimize the risk of giving users access to a `bytes` object
- # whose data is also given to a mutable buffer.
- values = e.args[0]
- if len(values) > 1:
- # The empty string and single characters are memoized in many
- # string creating functions in the capi. This case should not
- # warn even though we need to make a copy because we are only
- # copying at most 1 byte.
- warnings.warn(
- 'copying data after decompressing; this may mean that'
- ' decompress is caching its result',
- PerformanceWarning,
- )
- # fall through to copying `np.fromstring`
- # Copy the bytes into a numpy array.
- buf = np.frombuffer(values, dtype=dtype)
- buf = buf.copy() # required to not mutate the original data
- buf.flags.writeable = True
- return buf
- def encode(obj):
- """
- Data encoder
- """
- tobj = type(obj)
- if isinstance(obj, Index):
- if isinstance(obj, RangeIndex):
- return {u'typ': u'range_index',
- u'klass': u(obj.__class__.__name__),
- u'name': getattr(obj, 'name', None),
- u'start': getattr(obj, '_start', None),
- u'stop': getattr(obj, '_stop', None),
- u'step': getattr(obj, '_step', None)}
- elif isinstance(obj, PeriodIndex):
- return {u'typ': u'period_index',
- u'klass': u(obj.__class__.__name__),
- u'name': getattr(obj, 'name', None),
- u'freq': u_safe(getattr(obj, 'freqstr', None)),
- u'dtype': u(obj.dtype.name),
- u'data': convert(obj.asi8),
- u'compress': compressor}
- elif isinstance(obj, DatetimeIndex):
- tz = getattr(obj, 'tz', None)
- # store tz info and data as UTC
- if tz is not None:
- tz = u(tz.zone)
- obj = obj.tz_convert('UTC')
- return {u'typ': u'datetime_index',
- u'klass': u(obj.__class__.__name__),
- u'name': getattr(obj, 'name', None),
- u'dtype': u(obj.dtype.name),
- u'data': convert(obj.asi8),
- u'freq': u_safe(getattr(obj, 'freqstr', None)),
- u'tz': tz,
- u'compress': compressor}
- elif isinstance(obj, (IntervalIndex, IntervalArray)):
- if isinstance(obj, IntervalIndex):
- typ = u'interval_index'
- else:
- typ = u'interval_array'
- return {u'typ': typ,
- u'klass': u(obj.__class__.__name__),
- u'name': getattr(obj, 'name', None),
- u'left': getattr(obj, 'left', None),
- u'right': getattr(obj, 'right', None),
- u'closed': getattr(obj, 'closed', None)}
- elif isinstance(obj, MultiIndex):
- return {u'typ': u'multi_index',
- u'klass': u(obj.__class__.__name__),
- u'names': getattr(obj, 'names', None),
- u'dtype': u(obj.dtype.name),
- u'data': convert(obj.values),
- u'compress': compressor}
- else:
- return {u'typ': u'index',
- u'klass': u(obj.__class__.__name__),
- u'name': getattr(obj, 'name', None),
- u'dtype': u(obj.dtype.name),
- u'data': convert(obj.values),
- u'compress': compressor}
- elif isinstance(obj, Categorical):
- return {u'typ': u'category',
- u'klass': u(obj.__class__.__name__),
- u'name': getattr(obj, 'name', None),
- u'codes': obj.codes,
- u'categories': obj.categories,
- u'ordered': obj.ordered,
- u'compress': compressor}
- elif isinstance(obj, Series):
- if isinstance(obj, SparseSeries):
- raise NotImplementedError(
- 'msgpack sparse series is not implemented'
- )
- # d = {'typ': 'sparse_series',
- # 'klass': obj.__class__.__name__,
- # 'dtype': obj.dtype.name,
- # 'index': obj.index,
- # 'sp_index': obj.sp_index,
- # 'sp_values': convert(obj.sp_values),
- # 'compress': compressor}
- # for f in ['name', 'fill_value', 'kind']:
- # d[f] = getattr(obj, f, None)
- # return d
- else:
- return {u'typ': u'series',
- u'klass': u(obj.__class__.__name__),
- u'name': getattr(obj, 'name', None),
- u'index': obj.index,
- u'dtype': u(obj.dtype.name),
- u'data': convert(obj.values),
- u'compress': compressor}
- elif issubclass(tobj, NDFrame):
- if isinstance(obj, SparseDataFrame):
- raise NotImplementedError(
- 'msgpack sparse frame is not implemented'
- )
- # d = {'typ': 'sparse_dataframe',
- # 'klass': obj.__class__.__name__,
- # 'columns': obj.columns}
- # for f in ['default_fill_value', 'default_kind']:
- # d[f] = getattr(obj, f, None)
- # d['data'] = dict([(name, ss)
- # for name, ss in compat.iteritems(obj)])
- # return d
- else:
- data = obj._data
- if not data.is_consolidated():
- data = data.consolidate()
- # the block manager
- return {u'typ': u'block_manager',
- u'klass': u(obj.__class__.__name__),
- u'axes': data.axes,
- u'blocks': [{u'locs': b.mgr_locs.as_array,
- u'values': convert(b.values),
- u'shape': b.values.shape,
- u'dtype': u(b.dtype.name),
- u'klass': u(b.__class__.__name__),
- u'compress': compressor} for b in data.blocks]
- }
- elif isinstance(obj, (datetime, date, np.datetime64, timedelta,
- np.timedelta64)) or obj is NaT:
- if isinstance(obj, Timestamp):
- tz = obj.tzinfo
- if tz is not None:
- tz = u(tz.zone)
- freq = obj.freq
- if freq is not None:
- freq = u(freq.freqstr)
- return {u'typ': u'timestamp',
- u'value': obj.value,
- u'freq': freq,
- u'tz': tz}
- if obj is NaT:
- return {u'typ': u'nat'}
- elif isinstance(obj, np.timedelta64):
- return {u'typ': u'timedelta64',
- u'data': obj.view('i8')}
- elif isinstance(obj, timedelta):
- return {u'typ': u'timedelta',
- u'data': (obj.days, obj.seconds, obj.microseconds)}
- elif isinstance(obj, np.datetime64):
- return {u'typ': u'datetime64',
- u'data': u(str(obj))}
- elif isinstance(obj, datetime):
- return {u'typ': u'datetime',
- u'data': u(obj.isoformat())}
- elif isinstance(obj, date):
- return {u'typ': u'date',
- u'data': u(obj.isoformat())}
- raise Exception(
- "cannot encode this datetimelike object: {obj}".format(obj=obj))
- elif isinstance(obj, Period):
- return {u'typ': u'period',
- u'ordinal': obj.ordinal,
- u'freq': u_safe(obj.freqstr)}
- elif isinstance(obj, Interval):
- return {u'typ': u'interval',
- u'left': obj.left,
- u'right': obj.right,
- u'closed': obj.closed}
- elif isinstance(obj, BlockIndex):
- return {u'typ': u'block_index',
- u'klass': u(obj.__class__.__name__),
- u'blocs': obj.blocs,
- u'blengths': obj.blengths,
- u'length': obj.length}
- elif isinstance(obj, IntIndex):
- return {u'typ': u'int_index',
- u'klass': u(obj.__class__.__name__),
- u'indices': obj.indices,
- u'length': obj.length}
- elif isinstance(obj, np.ndarray):
- return {u'typ': u'ndarray',
- u'shape': obj.shape,
- u'ndim': obj.ndim,
- u'dtype': u(obj.dtype.name),
- u'data': convert(obj),
- u'compress': compressor}
- elif isinstance(obj, np.number):
- if np.iscomplexobj(obj):
- return {u'typ': u'np_scalar',
- u'sub_typ': u'np_complex',
- u'dtype': u(obj.dtype.name),
- u'real': u(obj.real.__repr__()),
- u'imag': u(obj.imag.__repr__())}
- else:
- return {u'typ': u'np_scalar',
- u'dtype': u(obj.dtype.name),
- u'data': u(obj.__repr__())}
- elif isinstance(obj, complex):
- return {u'typ': u'np_complex',
- u'real': u(obj.real.__repr__()),
- u'imag': u(obj.imag.__repr__())}
- return obj
- def decode(obj):
- """
- Decoder for deserializing numpy data types.
- """
- typ = obj.get(u'typ')
- if typ is None:
- return obj
- elif typ == u'timestamp':
- freq = obj[u'freq'] if 'freq' in obj else obj[u'offset']
- return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq)
- elif typ == u'nat':
- return NaT
- elif typ == u'period':
- return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
- elif typ == u'index':
- dtype = dtype_for(obj[u'dtype'])
- data = unconvert(obj[u'data'], dtype,
- obj.get(u'compress'))
- return Index(data, dtype=dtype, name=obj[u'name'])
- elif typ == u'range_index':
- return RangeIndex(obj[u'start'],
- obj[u'stop'],
- obj[u'step'],
- name=obj[u'name'])
- elif typ == u'multi_index':
- dtype = dtype_for(obj[u'dtype'])
- data = unconvert(obj[u'data'], dtype,
- obj.get(u'compress'))
- data = [tuple(x) for x in data]
- return MultiIndex.from_tuples(data, names=obj[u'names'])
- elif typ == u'period_index':
- data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
- d = dict(name=obj[u'name'], freq=obj[u'freq'])
- freq = d.pop('freq', None)
- return PeriodIndex(PeriodArray(data, freq), **d)
- elif typ == u'datetime_index':
- data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
- d = dict(name=obj[u'name'], freq=obj[u'freq'])
- result = DatetimeIndex(data, **d)
- tz = obj[u'tz']
- # reverse tz conversion
- if tz is not None:
- result = result.tz_localize('UTC').tz_convert(tz)
- return result
- elif typ in (u'interval_index', 'interval_array'):
- return globals()[obj[u'klass']].from_arrays(obj[u'left'],
- obj[u'right'],
- obj[u'closed'],
- name=obj[u'name'])
- elif typ == u'category':
- from_codes = globals()[obj[u'klass']].from_codes
- return from_codes(codes=obj[u'codes'],
- categories=obj[u'categories'],
- ordered=obj[u'ordered'])
- elif typ == u'interval':
- return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])
- elif typ == u'series':
- dtype = dtype_for(obj[u'dtype'])
- pd_dtype = pandas_dtype(dtype)
- index = obj[u'index']
- result = Series(unconvert(obj[u'data'], dtype, obj[u'compress']),
- index=index,
- dtype=pd_dtype,
- name=obj[u'name'])
- return result
- elif typ == u'block_manager':
- axes = obj[u'axes']
- def create_block(b):
- values = _safe_reshape(unconvert(
- b[u'values'], dtype_for(b[u'dtype']),
- b[u'compress']), b[u'shape'])
- # locs handles duplicate column names, and should be used instead
- # of items; see GH 9618
- if u'locs' in b:
- placement = b[u'locs']
- else:
- placement = axes[0].get_indexer(b[u'items'])
- if is_datetime64tz_dtype(b[u'dtype']):
- assert isinstance(values, np.ndarray), type(values)
- assert values.dtype == 'M8[ns]', values.dtype
- values = DatetimeArray(values, dtype=b[u'dtype'])
- return make_block(values=values,
- klass=getattr(internals, b[u'klass']),
- placement=placement,
- dtype=b[u'dtype'])
- blocks = [create_block(b) for b in obj[u'blocks']]
- return globals()[obj[u'klass']](BlockManager(blocks, axes))
- elif typ == u'datetime':
- return parse(obj[u'data'])
- elif typ == u'datetime64':
- return np.datetime64(parse(obj[u'data']))
- elif typ == u'date':
- return parse(obj[u'data']).date()
- elif typ == u'timedelta':
- return timedelta(*obj[u'data'])
- elif typ == u'timedelta64':
- return np.timedelta64(int(obj[u'data']))
- # elif typ == 'sparse_series':
- # dtype = dtype_for(obj['dtype'])
- # return SparseSeries(
- # unconvert(obj['sp_values'], dtype, obj['compress']),
- # sparse_index=obj['sp_index'], index=obj['index'],
- # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
- # elif typ == 'sparse_dataframe':
- # return SparseDataFrame(
- # obj['data'], columns=obj['columns'],
- # default_fill_value=obj['default_fill_value'],
- # default_kind=obj['default_kind']
- # )
- # elif typ == 'sparse_panel':
- # return SparsePanel(
- # obj['data'], items=obj['items'],
- # default_fill_value=obj['default_fill_value'],
- # default_kind=obj['default_kind'])
- elif typ == u'block_index':
- return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
- obj[u'blengths'])
- elif typ == u'int_index':
- return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
- elif typ == u'ndarray':
- return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
- obj.get(u'compress')).reshape(obj[u'shape'])
- elif typ == u'np_scalar':
- if obj.get(u'sub_typ') == u'np_complex':
- return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
- else:
- dtype = dtype_for(obj[u'dtype'])
- try:
- return dtype(obj[u'data'])
- except (ValueError, TypeError):
- return dtype.type(obj[u'data'])
- elif typ == u'np_complex':
- return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
- elif isinstance(obj, (dict, list, set)):
- return obj
- else:
- return obj
- def pack(o, default=encode,
- encoding='utf-8', unicode_errors='strict', use_single_float=False,
- autoreset=1, use_bin_type=1):
- """
- Pack an object and return the packed bytes.
- """
- return Packer(default=default, encoding=encoding,
- unicode_errors=unicode_errors,
- use_single_float=use_single_float,
- autoreset=autoreset,
- use_bin_type=use_bin_type).pack(o)
- def unpack(packed, object_hook=decode,
- list_hook=None, use_list=False, encoding='utf-8',
- unicode_errors='strict', object_pairs_hook=None,
- max_buffer_size=0, ext_hook=ExtType):
- """
- Unpack a packed object, return an iterator
- Note: packed lists will be returned as tuples
- """
- return Unpacker(packed, object_hook=object_hook,
- list_hook=list_hook,
- use_list=use_list, encoding=encoding,
- unicode_errors=unicode_errors,
- object_pairs_hook=object_pairs_hook,
- max_buffer_size=max_buffer_size,
- ext_hook=ext_hook)
- class Packer(_Packer):
- def __init__(self, default=encode,
- encoding='utf-8',
- unicode_errors='strict',
- use_single_float=False,
- autoreset=1,
- use_bin_type=1):
- super(Packer, self).__init__(default=default,
- encoding=encoding,
- unicode_errors=unicode_errors,
- use_single_float=use_single_float,
- autoreset=autoreset,
- use_bin_type=use_bin_type)
- class Unpacker(_Unpacker):
- def __init__(self, file_like=None, read_size=0, use_list=False,
- object_hook=decode,
- object_pairs_hook=None, list_hook=None, encoding='utf-8',
- unicode_errors='strict', max_buffer_size=0, ext_hook=ExtType):
- super(Unpacker, self).__init__(file_like=file_like,
- read_size=read_size,
- use_list=use_list,
- object_hook=object_hook,
- object_pairs_hook=object_pairs_hook,
- list_hook=list_hook,
- encoding=encoding,
- unicode_errors=unicode_errors,
- max_buffer_size=max_buffer_size,
- ext_hook=ext_hook)
- class Iterator(object):
- """ manage the unpacking iteration,
- close the file on completion """
- def __init__(self, path, **kwargs):
- self.path = path
- self.kwargs = kwargs
- def __iter__(self):
- needs_closing = True
- try:
- # see if we have an actual file
- if isinstance(self.path, compat.string_types):
- try:
- path_exists = os.path.exists(self.path)
- except TypeError:
- path_exists = False
- if path_exists:
- fh = open(self.path, 'rb')
- else:
- fh = compat.BytesIO(self.path)
- else:
- if not hasattr(self.path, 'read'):
- fh = compat.BytesIO(self.path)
- else:
- # a file-like
- needs_closing = False
- fh = self.path
- unpacker = unpack(fh)
- for o in unpacker:
- yield o
- finally:
- if needs_closing:
- fh.close()
|