Skip to content

gh-99631: Add custom loads and dumps support for the shelve module #99632

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion Doc/library/shelve.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ This includes most class instances, recursive data types, and objects containing
lots of shared sub-objects. The keys are ordinary strings.


.. function:: open(filename, flag='c', protocol=None, writeback=False)
.. function:: open(filename, flag='c', protocol=None, writeback=False, serializer=None, deserializer=None)

Open a persistent dictionary. The filename specified is the base filename for
the underlying database. As a side-effect, an extension may be added to the
Expand All @@ -41,13 +41,24 @@ lots of shared sub-objects. The keys are ordinary strings.
determine which accessed entries are mutable, nor which ones were actually
mutated).

By default, :mod:`shelve` uses :func:`pickle.dumps` and :func:`pickle.loads`
for serializing and deserializing. However *serializer* can be the function
that takes the :term:`bytes-like object` and returns the object. *deserializer*
can be the function that takes the object and returns :class:`bytes`. For example,
:keyword:`lambda`, which the :mod:`pickle` does not support, can be used in
:mod:`shelve` using the serializer and deserializer functions, which do support
the :keyword:`lambda`.

.. versionchanged:: 3.10
:data:`pickle.DEFAULT_PROTOCOL` is now used as the default pickle
protocol.

.. versionchanged:: 3.11
Accepts :term:`path-like object` for filename.

.. versionchanged:: 3.12
Accepts *serializer* and *deserializer*.

.. note::

Do not rely on the shelf being closed automatically; always call
Expand Down
35 changes: 23 additions & 12 deletions Lib/shelve.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,18 @@
the persistent dictionary on disk, if feasible).
"""

from pickle import DEFAULT_PROTOCOL, Pickler, Unpickler
from pickle import DEFAULT_PROTOCOL, Unpickler, dumps, loads
from io import BytesIO

import collections.abc

__all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"]


class ShelveError(Exception):
pass


class _ClosedDict(collections.abc.MutableMapping):
'Marker for a closed dict. Access attempts raise a ValueError.'

Expand All @@ -82,7 +87,7 @@ class Shelf(collections.abc.MutableMapping):
"""

def __init__(self, dict, protocol=None, writeback=False,
keyencoding="utf-8"):
keyencoding="utf-8", *, serializer=None, deserializer=None):
self.dict = dict
if protocol is None:
protocol = DEFAULT_PROTOCOL
Expand All @@ -91,6 +96,15 @@ def __init__(self, dict, protocol=None, writeback=False,
self.cache = {}
self.keyencoding = keyencoding

if serializer is None and deserializer is None:
self.serializer = dumps
self.deserializer = loads
elif (serializer is None) ^ (deserializer is None):
raise ShelveError("Serializer and deserializer must be defined together.")
else:
self.serializer = serializer
self.deserializer = deserializer

def __iter__(self):
for k in self.dict.keys():
yield k.decode(self.keyencoding)
Expand All @@ -110,19 +124,16 @@ def __getitem__(self, key):
try:
value = self.cache[key]
except KeyError:
f = BytesIO(self.dict[key.encode(self.keyencoding)])
value = Unpickler(f).load()
f = self.dict[key.encode(self.keyencoding)]
value = self.deserializer(f)
if self.writeback:
self.cache[key] = value
return value

def __setitem__(self, key, value):
if self.writeback:
self.cache[key] = value
f = BytesIO()
p = Pickler(f, self._protocol)
p.dump(value)
self.dict[key.encode(self.keyencoding)] = f.getvalue()
self.dict[key.encode(self.keyencoding)] = self.serializer(value, self._protocol)

def __delitem__(self, key):
del self.dict[key.encode(self.keyencoding)]
Expand Down Expand Up @@ -222,12 +233,12 @@ class DbfilenameShelf(Shelf):
See the module's __doc__ string for an overview of the interface.
"""

def __init__(self, filename, flag='c', protocol=None, writeback=False):
def __init__(self, filename, flag='c', protocol=None, writeback=False, *, serializer=None, deserializer=None):
import dbm
Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback)
Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback, serializer=serializer, deserializer=deserializer)


def open(filename, flag='c', protocol=None, writeback=False):
def open(filename, flag='c', protocol=None, writeback=False, *, serializer=None, deserializer=None):
"""Open a persistent dictionary for reading and writing.

The filename parameter is the base filename for the underlying
Expand All @@ -240,4 +251,4 @@ def open(filename, flag='c', protocol=None, writeback=False):
See the module's __doc__ string for an overview of the interface.
"""

return DbfilenameShelf(filename, flag, protocol, writeback)
return DbfilenameShelf(filename, flag, protocol, writeback, serializer, deserializer)
54 changes: 54 additions & 0 deletions Lib/test/test_shelve.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import shelve
import pickle
import os
from io import BytesIO
from pydoc import locate

from test.support import os_helper
from collections.abc import MutableMapping
Expand Down Expand Up @@ -165,6 +167,58 @@ def test_default_protocol(self):
with shelve.Shelf({}) as s:
self.assertEqual(s._protocol, pickle.DEFAULT_PROTOCOL)

def test_custom_serializer_and_deserializer(self):
def serializer(obj, protocol=None):
return bytes(f"{type(obj).__name__}", 'utf-8')

def deserializer(data):
value = BytesIO(data).read()
return locate(value.decode("utf-8"))

os.mkdir(self.dirname)
self.addCleanup(os_helper.rmtree, self.dirname)

with shelve.open(self.fn, serializer=serializer, deserializer=deserializer) as s:
num = 1
s['number'] = num
self.assertEqual(s['number'], type(num))

with self.assertRaises(AssertionError):
def serializer(obj, protocol=None):
return bytes(f"{type(obj).__name__}", 'utf-8')

def deserializer(data):
pass

with shelve.open(self.fn, serializer=serializer, deserializer=deserializer) as s:
s['number'] = 100
self.assertEqual(s['number'], 100)

with self.assertRaises(TypeError):
def serializer(obj, protocol=None):
pass

def deserializer(data):
return BytesIO(data).read().decode("utf-8")

with shelve.open(self.fn, serializer=serializer, deserializer=deserializer) as s:
s['number'] = 100
self.assertEqual(s['number'], 100)

def test_missing_custom_deserializer(self):
def serializer(obj, protocol=None):
pass

with self.assertRaises(shelve.ShelveError):
shelve.Shelf({}, protocol=2, writeback=False, serializer=serializer)

def test_missing_custom_serializer(self):
def deserializer(data):
pass

with self.assertRaises(shelve.ShelveError):
shelve.Shelf({}, protocol=2, writeback=False, deserializer=deserializer)


class TestShelveBase:
type2test = shelve.Shelf
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Shelve module accepts serializer and deserializer functions.