Skip to content

bpo-36785: PEP 574 implementation #7076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
271 changes: 214 additions & 57 deletions Doc/library/pickle.rst

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Include/Python.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
#include "weakrefobject.h"
#include "structseq.h"
#include "namespaceobject.h"
#include "picklebufobject.h"

#include "codecs.h"
#include "pyerrors.h"
Expand Down
31 changes: 31 additions & 0 deletions Include/picklebufobject.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* PickleBuffer object. This is built-in for ease of use from third-party
* C extensions.
*/

#ifndef Py_PICKLEBUFOBJECT_H
#define Py_PICKLEBUFOBJECT_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_LIMITED_API

PyAPI_DATA(PyTypeObject) PyPickleBuffer_Type;

#define PyPickleBuffer_Check(op) (Py_TYPE(op) == &PyPickleBuffer_Type)

/* Create a PickleBuffer redirecting to the given buffer-enabled object */
PyAPI_FUNC(PyObject *) PyPickleBuffer_FromObject(PyObject *);
/* Get the PickleBuffer's underlying view to the original object
* (NULL if released)
*/
PyAPI_FUNC(const Py_buffer *) PyPickleBuffer_GetBuffer(PyObject *);
/* Release the PickleBuffer. Returns 0 on success, -1 on error. */
PyAPI_FUNC(int) PyPickleBuffer_Release(PyObject *);

#endif /* !Py_LIMITED_API */

#ifdef __cplusplus
}
#endif
#endif /* !Py_PICKLEBUFOBJECT_H */
152 changes: 139 additions & 13 deletions Lib/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@
import codecs
import _compat_pickle

from _pickle import PickleBuffer

__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
"Unpickler", "dump", "dumps", "load", "loads"]
"Unpickler", "dump", "dumps", "load", "loads", "PickleBuffer"]

# Shortcut for use in isinstance testing
bytes_types = (bytes, bytearray)
Expand All @@ -51,10 +53,11 @@
"2.0", # Protocol 2
"3.0", # Protocol 3
"4.0", # Protocol 4
"5.0", # Protocol 5
] # Old format versions we can read

# This is the highest protocol number we know how to read.
HIGHEST_PROTOCOL = 4
HIGHEST_PROTOCOL = 5

# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
# Only bump this if the oldest still supported version of Python already
Expand Down Expand Up @@ -167,6 +170,7 @@ def __init__(self, value):
SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes

# Protocol 4

SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
BINUNICODE8 = b'\x8d' # push very long string
BINBYTES8 = b'\x8e' # push very long bytes string
Expand All @@ -178,6 +182,12 @@ def __init__(self, value):
MEMOIZE = b'\x94' # store top of the stack in memo
FRAME = b'\x95' # indicate the beginning of a new frame

# Protocol 5

BYTEARRAY8 = b'\x96' # push bytearray
NEXT_BUFFER = b'\x97' # push next out-of-band buffer
READONLY_BUFFER = b'\x98' # make top of stack readonly

__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])


Expand Down Expand Up @@ -251,6 +261,23 @@ def __init__(self, file_read, file_readline, file_tell=None):
self.file_readline = file_readline
self.current_frame = None

def readinto(self, buf):
if self.current_frame:
n = self.current_frame.readinto(buf)
if n == 0 and len(buf) != 0:
self.current_frame = None
n = len(buf)
buf[:] = self.file_read(n)
return n
if n < len(buf):
raise UnpicklingError(
"pickle exhausted before end of frame")
return n
else:
n = len(buf)
buf[:] = self.file_read(n)
return n

def read(self, n):
if self.current_frame:
data = self.current_frame.read(n)
Expand Down Expand Up @@ -371,7 +398,8 @@ def decode_long(data):

class _Pickler:

def __init__(self, file, protocol=None, *, fix_imports=True):
def __init__(self, file, protocol=None, *, fix_imports=True,
buffer_callback=None):
"""This takes a binary file for writing a pickle data stream.

The optional *protocol* argument tells the pickler to use the
Expand All @@ -393,13 +421,27 @@ def __init__(self, file, protocol=None, *, fix_imports=True):
will try to map the new Python 3 names to the old module names
used in Python 2, so that the pickle data stream is readable
with Python 2.

If *buffer_callback* is None (the default), buffer views are
serialized into *file* as part of the pickle stream.

If *buffer_callback* is not None, then it can be called any number
of times with a buffer view. If the callback returns a false value
(such as None), the given buffer is out-of-band; otherwise the
buffer is serialized in-band, i.e. inside the pickle stream.

It is an error if *buffer_callback* is not None and *protocol*
is None or smaller than 5.
"""
if protocol is None:
protocol = DEFAULT_PROTOCOL
if protocol < 0:
protocol = HIGHEST_PROTOCOL
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
if buffer_callback is not None and protocol < 5:
raise ValueError("buffer_callback needs protocol >= 5")
self._buffer_callback = buffer_callback
try:
self._file_write = file.write
except AttributeError:
Expand Down Expand Up @@ -756,6 +798,46 @@ def save_bytes(self, obj):
self.memoize(obj)
dispatch[bytes] = save_bytes

def save_bytearray(self, obj):
if self.proto < 5:
if not obj: # bytearray is empty
self.save_reduce(bytearray, (), obj=obj)
else:
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
return
n = len(obj)
if n >= self.framer._FRAME_SIZE_TARGET:
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
else:
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
dispatch[bytearray] = save_bytearray

def save_picklebuffer(self, obj):
if self.proto < 5:
raise PicklingError("PickleBuffer can only pickled with "
"protocol >= 5")
with obj.raw() as m:
if not m.contiguous:
raise PicklingError("PickleBuffer can not be pickled when "
"pointing to a non-contiguous buffer")
in_band = True
if self._buffer_callback is not None:
in_band = bool(self._buffer_callback(obj))
if in_band:
# Write data in-band
# XXX The C implementation avoids a copy here
if m.readonly:
self.save_bytes(m.tobytes())
else:
self.save_bytearray(m.tobytes())
else:
# Write data out-of-band
self.write(NEXT_BUFFER)
if m.readonly:
self.write(READONLY_BUFFER)

dispatch[PickleBuffer] = save_picklebuffer

def save_str(self, obj):
if self.bin:
encoded = obj.encode('utf-8', 'surrogatepass')
Expand Down Expand Up @@ -1042,7 +1124,7 @@ def save_type(self, obj):
class _Unpickler:

def __init__(self, file, *, fix_imports=True,
encoding="ASCII", errors="strict"):
encoding="ASCII", errors="strict", buffers=None):
"""This takes a binary file for reading a pickle data stream.

The protocol version of the pickle is detected automatically, so
Expand All @@ -1061,7 +1143,17 @@ def __init__(self, file, *, fix_imports=True,
reading, a BytesIO object, or any other custom object that
meets this interface.

Optional keyword arguments are *fix_imports*, *encoding* and
If *buffers* is not None, it should be an iterable of buffer-enabled
objects that is consumed each time the pickle stream references
an out-of-band buffer view. Such buffers have been given in order
to the *buffer_callback* of a Pickler object.

If *buffers* is None (the default), then the buffers are taken
from the pickle stream, assuming they are serialized there.
It is an error for *buffers* to be None if the pickle stream
was produced with a non-None *buffer_callback*.

Other optional arguments are *fix_imports*, *encoding* and
*errors*, which are used to control compatibility support for
pickle stream generated by Python 2. If *fix_imports* is True,
pickle will try to map the old Python 2 names to the new names
Expand All @@ -1070,6 +1162,7 @@ def __init__(self, file, *, fix_imports=True,
default to 'ASCII' and 'strict', respectively. *encoding* can be
'bytes' to read theses 8-bit string instances as bytes objects.
"""
self._buffers = iter(buffers) if buffers is not None else None
self._file_readline = file.readline
self._file_read = file.read
self.memo = {}
Expand All @@ -1090,6 +1183,7 @@ def load(self):
"%s.__init__()" % (self.__class__.__name__,))
self._unframer = _Unframer(self._file_read, self._file_readline)
self.read = self._unframer.read
self.readinto = self._unframer.readinto
self.readline = self._unframer.readline
self.metastack = []
self.stack = []
Expand Down Expand Up @@ -1276,6 +1370,34 @@ def load_binbytes8(self):
self.append(self.read(len))
dispatch[BINBYTES8[0]] = load_binbytes8

def load_bytearray8(self):
len, = unpack('<Q', self.read(8))
if len > maxsize:
raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
"of %d bytes" % maxsize)
b = bytearray(len)
self.readinto(b)
self.append(b)
dispatch[BYTEARRAY8[0]] = load_bytearray8

def load_next_buffer(self):
if self._buffers is None:
raise UnpicklingError("pickle stream refers to out-of-band data "
"but no *buffers* argument was given")
try:
buf = next(self._buffers)
except StopIteration:
raise UnpicklingError("not enough out-of-band buffers")
self.append(buf)
dispatch[NEXT_BUFFER[0]] = load_next_buffer

def load_readonly_buffer(self):
buf = self.stack[-1]
with memoryview(buf) as m:
if not m.readonly:
self.stack[-1] = m.toreadonly()
dispatch[READONLY_BUFFER[0]] = load_readonly_buffer

def load_short_binstring(self):
len = self.read(1)[0]
data = self.read(len)
Expand Down Expand Up @@ -1600,25 +1722,29 @@ def load_stop(self):

# Shorthands

def _dump(obj, file, protocol=None, *, fix_imports=True):
_Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
_Pickler(file, protocol, fix_imports=fix_imports,
buffer_callback=buffer_callback).dump(obj)

def _dumps(obj, protocol=None, *, fix_imports=True):
def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
f = io.BytesIO()
_Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
_Pickler(f, protocol, fix_imports=fix_imports,
buffer_callback=buffer_callback).dump(obj)
res = f.getvalue()
assert isinstance(res, bytes_types)
return res

def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
return _Unpickler(file, fix_imports=fix_imports,
def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
buffers=None):
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
encoding=encoding, errors=errors).load()

def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
buffers=None):
if isinstance(s, str):
raise TypeError("Can't load pickle from unicode string")
file = io.BytesIO(s)
return _Unpickler(file, fix_imports=fix_imports,
return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
encoding=encoding, errors=errors).load()

# Use the faster _pickle if possible
Expand Down
Loading