diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst index a50fc6f0bf77b2..388c2fee466c99 100644 --- a/Doc/library/shelve.rst +++ b/Doc/library/shelve.rst @@ -17,7 +17,7 @@ This includes most class instances, recursive data types, and objects containing lots of shared sub-objects. The keys are ordinary strings. -.. function:: open(filename, flag='c', protocol=None, writeback=False) +.. function:: open(filename, flag='c', protocol=None, writeback=False, serializer=None, deserializer=None) Open a persistent dictionary. The filename specified is the base filename for the underlying database. As a side-effect, an extension may be added to the @@ -41,6 +41,14 @@ lots of shared sub-objects. The keys are ordinary strings. determine which accessed entries are mutable, nor which ones were actually mutated). + By default, :mod:`shelve` uses :func:`pickle.dumps` and :func:`pickle.loads` + for serializing and deserializing. However *serializer* can be the function + that takes the :term:`bytes-like object` and returns the object. *deserializer* + can be the function that takes the object and returns :class:`bytes`. For example, + :keyword:`lambda`, which the :mod:`pickle` does not support, can be used in + :mod:`shelve` using the serializer and deserializer functions, which do support + the :keyword:`lambda`. + .. versionchanged:: 3.10 :data:`pickle.DEFAULT_PROTOCOL` is now used as the default pickle protocol. @@ -48,6 +56,9 @@ lots of shared sub-objects. The keys are ordinary strings. .. versionchanged:: 3.11 Accepts :term:`path-like object` for filename. + .. versionchanged:: 3.12 + Accepts *serializer* and *deserializer*. + .. note:: Do not rely on the shelf being closed automatically; always call diff --git a/Lib/shelve.py b/Lib/shelve.py index e053c397345a07..f4bac3d962201c 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -56,13 +56,18 @@ the persistent dictionary on disk, if feasible). """ -from pickle import DEFAULT_PROTOCOL, Pickler, Unpickler +from pickle import DEFAULT_PROTOCOL, Unpickler, dumps, loads from io import BytesIO import collections.abc __all__ = ["Shelf", "BsdDbShelf", "DbfilenameShelf", "open"] + +class ShelveError(Exception): + pass + + class _ClosedDict(collections.abc.MutableMapping): 'Marker for a closed dict. Access attempts raise a ValueError.' @@ -82,7 +87,7 @@ class Shelf(collections.abc.MutableMapping): """ def __init__(self, dict, protocol=None, writeback=False, - keyencoding="utf-8"): + keyencoding="utf-8", *, serializer=None, deserializer=None): self.dict = dict if protocol is None: protocol = DEFAULT_PROTOCOL @@ -91,6 +96,15 @@ def __init__(self, dict, protocol=None, writeback=False, self.cache = {} self.keyencoding = keyencoding + if serializer is None and deserializer is None: + self.serializer = dumps + self.deserializer = loads + elif (serializer is None) ^ (deserializer is None): + raise ShelveError("Serializer and deserializer must be defined together.") + else: + self.serializer = serializer + self.deserializer = deserializer + def __iter__(self): for k in self.dict.keys(): yield k.decode(self.keyencoding) @@ -110,8 +124,8 @@ def __getitem__(self, key): try: value = self.cache[key] except KeyError: - f = BytesIO(self.dict[key.encode(self.keyencoding)]) - value = Unpickler(f).load() + f = self.dict[key.encode(self.keyencoding)] + value = self.deserializer(f) if self.writeback: self.cache[key] = value return value @@ -119,10 +133,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): if self.writeback: self.cache[key] = value - f = BytesIO() - p = Pickler(f, self._protocol) - p.dump(value) - self.dict[key.encode(self.keyencoding)] = f.getvalue() + self.dict[key.encode(self.keyencoding)] = self.serializer(value, self._protocol) def __delitem__(self, key): del self.dict[key.encode(self.keyencoding)] @@ -222,12 +233,12 @@ class DbfilenameShelf(Shelf): See the module's __doc__ string for an overview of the interface. """ - def __init__(self, filename, flag='c', protocol=None, writeback=False): + def __init__(self, filename, flag='c', protocol=None, writeback=False, *, serializer=None, deserializer=None): import dbm - Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback) + Shelf.__init__(self, dbm.open(filename, flag), protocol, writeback, serializer=serializer, deserializer=deserializer) -def open(filename, flag='c', protocol=None, writeback=False): +def open(filename, flag='c', protocol=None, writeback=False, *, serializer=None, deserializer=None): """Open a persistent dictionary for reading and writing. The filename parameter is the base filename for the underlying @@ -240,4 +251,4 @@ def open(filename, flag='c', protocol=None, writeback=False): See the module's __doc__ string for an overview of the interface. """ - return DbfilenameShelf(filename, flag, protocol, writeback) + return DbfilenameShelf(filename, flag, protocol, writeback, serializer, deserializer) diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py index 08c6562f2a273e..87477d830d377e 100644 --- a/Lib/test/test_shelve.py +++ b/Lib/test/test_shelve.py @@ -3,6 +3,8 @@ import shelve import pickle import os +from io import BytesIO +from pydoc import locate from test.support import os_helper from collections.abc import MutableMapping @@ -165,6 +167,58 @@ def test_default_protocol(self): with shelve.Shelf({}) as s: self.assertEqual(s._protocol, pickle.DEFAULT_PROTOCOL) + def test_custom_serializer_and_deserializer(self): + def serializer(obj, protocol=None): + return bytes(f"{type(obj).__name__}", 'utf-8') + + def deserializer(data): + value = BytesIO(data).read() + return locate(value.decode("utf-8")) + + os.mkdir(self.dirname) + self.addCleanup(os_helper.rmtree, self.dirname) + + with shelve.open(self.fn, serializer=serializer, deserializer=deserializer) as s: + num = 1 + s['number'] = num + self.assertEqual(s['number'], type(num)) + + with self.assertRaises(AssertionError): + def serializer(obj, protocol=None): + return bytes(f"{type(obj).__name__}", 'utf-8') + + def deserializer(data): + pass + + with shelve.open(self.fn, serializer=serializer, deserializer=deserializer) as s: + s['number'] = 100 + self.assertEqual(s['number'], 100) + + with self.assertRaises(TypeError): + def serializer(obj, protocol=None): + pass + + def deserializer(data): + return BytesIO(data).read().decode("utf-8") + + with shelve.open(self.fn, serializer=serializer, deserializer=deserializer) as s: + s['number'] = 100 + self.assertEqual(s['number'], 100) + + def test_missing_custom_deserializer(self): + def serializer(obj, protocol=None): + pass + + with self.assertRaises(shelve.ShelveError): + shelve.Shelf({}, protocol=2, writeback=False, serializer=serializer) + + def test_missing_custom_serializer(self): + def deserializer(data): + pass + + with self.assertRaises(shelve.ShelveError): + shelve.Shelf({}, protocol=2, writeback=False, deserializer=deserializer) + class TestShelveBase: type2test = shelve.Shelf diff --git a/Misc/NEWS.d/next/Documentation/2022-12-05-19-21-22.gh-issue-99631.WVkNJl.rst b/Misc/NEWS.d/next/Documentation/2022-12-05-19-21-22.gh-issue-99631.WVkNJl.rst new file mode 100644 index 00000000000000..3a14448a98b84c --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2022-12-05-19-21-22.gh-issue-99631.WVkNJl.rst @@ -0,0 +1 @@ +Shelve module accepts serializer and deserializer functions.