pickle.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. """ pickle compat """
  2. import warnings
  3. import numpy as np
  4. from numpy.lib.format import read_array, write_array
  5. from pandas.compat import PY3, BytesIO, cPickle as pkl, pickle_compat as pc
  6. from pandas.io.common import _get_handle, _stringify_path
  7. def to_pickle(obj, path, compression='infer', protocol=pkl.HIGHEST_PROTOCOL):
  8. """
  9. Pickle (serialize) object to file.
  10. Parameters
  11. ----------
  12. obj : any object
  13. Any python object.
  14. path : str
  15. File path where the pickled object will be stored.
  16. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
  17. A string representing the compression to use in the output file. By
  18. default, infers from the file extension in specified path.
  19. .. versionadded:: 0.20.0
  20. protocol : int
  21. Int which indicates which protocol should be used by the pickler,
  22. default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
  23. values for this parameter depend on the version of Python. For Python
  24. 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
  25. For Python >= 3.4, 4 is a valid value. A negative value for the
  26. protocol parameter is equivalent to setting its value to
  27. HIGHEST_PROTOCOL.
  28. .. [1] https://docs.python.org/3/library/pickle.html
  29. .. versionadded:: 0.21.0
  30. See Also
  31. --------
  32. read_pickle : Load pickled pandas object (or any object) from file.
  33. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  34. DataFrame.to_sql : Write DataFrame to a SQL database.
  35. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  36. Examples
  37. --------
  38. >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
  39. >>> original_df
  40. foo bar
  41. 0 0 5
  42. 1 1 6
  43. 2 2 7
  44. 3 3 8
  45. 4 4 9
  46. >>> pd.to_pickle(original_df, "./dummy.pkl")
  47. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  48. >>> unpickled_df
  49. foo bar
  50. 0 0 5
  51. 1 1 6
  52. 2 2 7
  53. 3 3 8
  54. 4 4 9
  55. >>> import os
  56. >>> os.remove("./dummy.pkl")
  57. """
  58. path = _stringify_path(path)
  59. f, fh = _get_handle(path, 'wb',
  60. compression=compression,
  61. is_text=False)
  62. if protocol < 0:
  63. protocol = pkl.HIGHEST_PROTOCOL
  64. try:
  65. f.write(pkl.dumps(obj, protocol=protocol))
  66. finally:
  67. for _f in fh:
  68. _f.close()
  69. def read_pickle(path, compression='infer'):
  70. """
  71. Load pickled pandas object (or any object) from file.
  72. .. warning::
  73. Loading pickled data received from untrusted sources can be
  74. unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
  75. Parameters
  76. ----------
  77. path : str
  78. File path where the pickled object will be loaded.
  79. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
  80. For on-the-fly decompression of on-disk data. If 'infer', then use
  81. gzip, bz2, xz or zip if path ends in '.gz', '.bz2', '.xz',
  82. or '.zip' respectively, and no decompression otherwise.
  83. Set to None for no decompression.
  84. .. versionadded:: 0.20.0
  85. Returns
  86. -------
  87. unpickled : same type as object stored in file
  88. See Also
  89. --------
  90. DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
  91. Series.to_pickle : Pickle (serialize) Series object to file.
  92. read_hdf : Read HDF5 file into a DataFrame.
  93. read_sql : Read SQL query or database table into a DataFrame.
  94. read_parquet : Load a parquet object, returning a DataFrame.
  95. Examples
  96. --------
  97. >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)})
  98. >>> original_df
  99. foo bar
  100. 0 0 5
  101. 1 1 6
  102. 2 2 7
  103. 3 3 8
  104. 4 4 9
  105. >>> pd.to_pickle(original_df, "./dummy.pkl")
  106. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  107. >>> unpickled_df
  108. foo bar
  109. 0 0 5
  110. 1 1 6
  111. 2 2 7
  112. 3 3 8
  113. 4 4 9
  114. >>> import os
  115. >>> os.remove("./dummy.pkl")
  116. """
  117. path = _stringify_path(path)
  118. def read_wrapper(func):
  119. # wrapper file handle open/close operation
  120. f, fh = _get_handle(path, 'rb',
  121. compression=compression,
  122. is_text=False)
  123. try:
  124. return func(f)
  125. finally:
  126. for _f in fh:
  127. _f.close()
  128. def try_read(path, encoding=None):
  129. # try with cPickle
  130. # try with current pickle, if we have a Type Error then
  131. # try with the compat pickle to handle subclass changes
  132. # pass encoding only if its not None as py2 doesn't handle
  133. # the param
  134. # cpickle
  135. # GH 6899
  136. try:
  137. with warnings.catch_warnings(record=True):
  138. # We want to silence any warnings about, e.g. moved modules.
  139. warnings.simplefilter("ignore", Warning)
  140. return read_wrapper(lambda f: pkl.load(f))
  141. except Exception: # noqa: E722
  142. # reg/patched pickle
  143. # compat not used in pandas/compat/pickle_compat.py::load
  144. # TODO: remove except block OR modify pc.load to use compat
  145. try:
  146. return read_wrapper(
  147. lambda f: pc.load(f, encoding=encoding, compat=False))
  148. # compat pickle
  149. except Exception: # noqa: E722
  150. return read_wrapper(
  151. lambda f: pc.load(f, encoding=encoding, compat=True))
  152. try:
  153. return try_read(path)
  154. except Exception: # noqa: E722
  155. if PY3:
  156. return try_read(path, encoding='latin1')
  157. raise
  158. # compat with sparse pickle / unpickle
  159. def _pickle_array(arr):
  160. arr = arr.view(np.ndarray)
  161. buf = BytesIO()
  162. write_array(buf, arr)
  163. return buf.getvalue()
  164. def _unpickle_array(bytes):
  165. arr = read_array(BytesIO(bytes))
  166. return arr