From e47c5e1924f9af79af7804083d9167f4109803c7 Mon Sep 17 00:00:00 2001 From: Tim Burgess Date: Wed, 5 Sep 2018 11:06:10 +0800 Subject: [PATCH 1/7] change unpickling to use import rather than retrieving from sys.modules --- .../Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst | 3 +++ Modules/_pickle.c | 11 +++-------- 2 files changed, 6 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst diff --git a/Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst b/Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst new file mode 100644 index 00000000000000..0468d96420f349 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst @@ -0,0 +1,3 @@ +Fix C implementation of pickle.loads to use importlib's locking +mechanisms, and thereby avoid using partially-loaded modules. +Patch by Tim Burgess. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 2de70f5d9405dc..411a6259aaf398 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -6623,14 +6623,9 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, } } - module = PyImport_GetModule(module_name); - if (module == NULL) { - if (PyErr_Occurred()) - return NULL; - module = PyImport_Import(module_name); - if (module == NULL) - return NULL; - } + module = PyImport_Import(module_name); + if (module == NULL) + return NULL; global = getattribute(module, global_name, self->proto >= 4); Py_DECREF(module); return global; From 595bdbb673e9c80fa844754b4b868816ef534b61 Mon Sep 17 00:00:00 2001 From: Tim Burgess Date: Thu, 7 Feb 2019 09:08:15 +0800 Subject: [PATCH 2/7] pep7 fixup --- Modules/_pickle.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 411a6259aaf398..7e5f5067c494a4 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -6624,8 +6624,9 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, } module = PyImport_Import(module_name); - if (module == NULL) + if (module == NULL) { return NULL; + } global = getattribute(module, global_name, self->proto >= 4); Py_DECREF(module); return global; From f72802acfd2049285a7d2c1ce446f4991cdb8395 Mon Sep 17 00:00:00 2001 From: Tim Burgess Date: Thu, 7 Feb 2019 09:38:54 +0800 Subject: [PATCH 3/7] add unit test for bpo-34572 --- Lib/test/test_threaded_pickle.py | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 Lib/test/test_threaded_pickle.py diff --git a/Lib/test/test_threaded_pickle.py b/Lib/test/test_threaded_pickle.py new file mode 100644 index 00000000000000..ebd35892bb14ab --- /dev/null +++ b/Lib/test/test_threaded_pickle.py @@ -0,0 +1,53 @@ +import sys +import importlib +import os +import shutil +import threading +import unittest +from test.support import TESTFN, forget +import pickle + +slow_import_module = """ +import time + +time.sleep(%(delay)s) + +class ToBePickled(object): + pass +""" + + +class ThreadedPickleTests(unittest.TestCase): + def test_pickle_thread_imports(self): + # https://bugs.python.org/issue34572 + delay = 0.01 + os.mkdir(TESTFN) + self.addCleanup(shutil.rmtree, TESTFN) + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + contents = slow_import_module % {'delay': delay} + with open(os.path.join(TESTFN, "slowimport.py"), "wb") as f: + f.write(contents.encode('utf-8')) + self.addCleanup(forget, "slowimport") + + import slowimport + obj = slowimport.ToBePickled() + pickle_bytes = pickle.dumps(obj) + del obj + + # Then try to unpickle two of these simultaneously + def do_unpickle(): + results.append(pickle.loads(pickle_bytes)) + + for repeat_test in range(10): + results = [] + forget("slowimport") + self.assertNotIn("slowimport", sys.modules) + importlib.invalidate_caches() + t1 = threading.Thread(target=do_unpickle) + t2 = threading.Thread(target=do_unpickle) + t1.start() + t2.start() + t1.join() + t2.join() + self.assertEqual(len(results), 2) From fa5dbebf4d7c0654a1503a8eb9d4fd2e6b69c111 Mon Sep 17 00:00:00 2001 From: Tim Burgess Date: Fri, 8 Feb 2019 12:48:58 +0800 Subject: [PATCH 4/7] add comment to explain why GetModule wasn't used --- Modules/_pickle.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 7e5f5067c494a4..2d8d920688fb43 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -6623,6 +6623,10 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, } } + /* + * we don't use PyImport_GetModule here, because it can return partially- + * initialised modules, which then cause the getattribute to fail. + */ module = PyImport_Import(module_name); if (module == NULL) { return NULL; From 64a7f954c56d5afc00ea1d8008fb78ff65ddafa1 Mon Sep 17 00:00:00 2001 From: Tim Burgess Date: Fri, 8 Feb 2019 12:49:16 +0800 Subject: [PATCH 5/7] rewrite test case to avoid sleeps --- Lib/test/pickletester.py | 63 +++++++++++++++++++++++++++++++- Lib/test/test_threaded_pickle.py | 53 --------------------------- 2 files changed, 62 insertions(+), 54 deletions(-) delete mode 100644 Lib/test/test_threaded_pickle.py diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 71c2feadb613d2..dff87434d4432a 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -3,18 +3,22 @@ import dbm import io import functools +import os import pickle import pickletools +import shutil import struct import sys +import threading import unittest import weakref +from textwrap import dedent from http.cookies import SimpleCookie from test import support from test.support import ( TestFailed, TESTFN, run_with_locale, no_tracing, - _2G, _4G, bigmemtest, + _2G, _4G, bigmemtest, reap_threads, forget, ) from pickle import bytes_types @@ -2387,6 +2391,63 @@ def f(): with self.assertRaises((AttributeError, pickle.PicklingError)): pickletools.dis(self.dumps(f, proto)) + @reap_threads + def test_unpickle_module_race(self): + # https://bugs.python.org/issue34572 + locker_module = dedent(""" + import threading + barrier = threading.Barrier(2) + """) + locking_import_module = dedent(""" + import locker + locker.barrier.wait() + class ToBeUnpickled(object): + pass + """) + + os.mkdir(TESTFN) + self.addCleanup(shutil.rmtree, TESTFN) + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + with open(os.path.join(TESTFN, "locker.py"), "wb") as f: + f.write(locker_module.encode('utf-8')) + with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f: + f.write(locking_import_module.encode('utf-8')) + self.addCleanup(forget, "locker") + self.addCleanup(forget, "locking_import") + + import locker + + pickle_bytes = ( + b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.') + + # Then try to unpickle two of these simultaneously + # One of them will cause the module import, and we want it to block + # until the other one either: + # - fails (before the patch for this issue) + # - blocks on the import lock for the module, as it should + results = [] + barrier = threading.Barrier(3) + def t(): + # This ensures the threads have all started + # presumably barrier release is faster than thread startup + barrier.wait() + results.append(pickle.loads(pickle_bytes)) + + t1 = threading.Thread(target=t) + t2 = threading.Thread(target=t) + t1.start() + t2.start() + + barrier.wait() + # could have delay here + locker.barrier.wait() + + t1.join() + t2.join() + + self.assertEqual(len(results), 2) + class BigmemPickleTests(unittest.TestCase): diff --git a/Lib/test/test_threaded_pickle.py b/Lib/test/test_threaded_pickle.py deleted file mode 100644 index ebd35892bb14ab..00000000000000 --- a/Lib/test/test_threaded_pickle.py +++ /dev/null @@ -1,53 +0,0 @@ -import sys -import importlib -import os -import shutil -import threading -import unittest -from test.support import TESTFN, forget -import pickle - -slow_import_module = """ -import time - -time.sleep(%(delay)s) - -class ToBePickled(object): - pass -""" - - -class ThreadedPickleTests(unittest.TestCase): - def test_pickle_thread_imports(self): - # https://bugs.python.org/issue34572 - delay = 0.01 - os.mkdir(TESTFN) - self.addCleanup(shutil.rmtree, TESTFN) - sys.path.insert(0, TESTFN) - self.addCleanup(sys.path.remove, TESTFN) - contents = slow_import_module % {'delay': delay} - with open(os.path.join(TESTFN, "slowimport.py"), "wb") as f: - f.write(contents.encode('utf-8')) - self.addCleanup(forget, "slowimport") - - import slowimport - obj = slowimport.ToBePickled() - pickle_bytes = pickle.dumps(obj) - del obj - - # Then try to unpickle two of these simultaneously - def do_unpickle(): - results.append(pickle.loads(pickle_bytes)) - - for repeat_test in range(10): - results = [] - forget("slowimport") - self.assertNotIn("slowimport", sys.modules) - importlib.invalidate_caches() - t1 = threading.Thread(target=do_unpickle) - t2 = threading.Thread(target=do_unpickle) - t1.start() - t2.start() - t1.join() - t2.join() - self.assertEqual(len(results), 2) From 202d4e2a11f857801356ba181b6e6179a6068016 Mon Sep 17 00:00:00 2001 From: Tim Burgess Date: Fri, 8 Feb 2019 15:13:27 +0800 Subject: [PATCH 6/7] move test to AbstractUnpickleTester --- Lib/test/pickletester.py | 114 +++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index dff87434d4432a..dc7980febf58d9 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1178,6 +1178,63 @@ def test_truncated_data(self): for p in badpickles: self.check_unpickling_error(self.truncated_errors, p) + @reap_threads + def test_unpickle_module_race(self): + # https://bugs.python.org/issue34572 + locker_module = dedent(""" + import threading + barrier = threading.Barrier(2) + """) + locking_import_module = dedent(""" + import locker + locker.barrier.wait() + class ToBeUnpickled(object): + pass + """) + + os.mkdir(TESTFN) + self.addCleanup(shutil.rmtree, TESTFN) + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + with open(os.path.join(TESTFN, "locker.py"), "wb") as f: + f.write(locker_module.encode('utf-8')) + with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f: + f.write(locking_import_module.encode('utf-8')) + self.addCleanup(forget, "locker") + self.addCleanup(forget, "locking_import") + + import locker + + pickle_bytes = ( + b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.') + + # Then try to unpickle two of these simultaneously + # One of them will cause the module import, and we want it to block + # until the other one either: + # - fails (before the patch for this issue) + # - blocks on the import lock for the module, as it should + results = [] + barrier = threading.Barrier(3) + def t(): + # This ensures the threads have all started + # presumably barrier release is faster than thread startup + barrier.wait() + results.append(pickle.loads(pickle_bytes)) + + t1 = threading.Thread(target=t) + t2 = threading.Thread(target=t) + t1.start() + t2.start() + + barrier.wait() + # could have delay here + locker.barrier.wait() + + t1.join() + t2.join() + + self.assertEqual(len(results), 2) + class AbstractPickleTests(unittest.TestCase): # Subclass must define self.dumps, self.loads. @@ -2391,63 +2448,6 @@ def f(): with self.assertRaises((AttributeError, pickle.PicklingError)): pickletools.dis(self.dumps(f, proto)) - @reap_threads - def test_unpickle_module_race(self): - # https://bugs.python.org/issue34572 - locker_module = dedent(""" - import threading - barrier = threading.Barrier(2) - """) - locking_import_module = dedent(""" - import locker - locker.barrier.wait() - class ToBeUnpickled(object): - pass - """) - - os.mkdir(TESTFN) - self.addCleanup(shutil.rmtree, TESTFN) - sys.path.insert(0, TESTFN) - self.addCleanup(sys.path.remove, TESTFN) - with open(os.path.join(TESTFN, "locker.py"), "wb") as f: - f.write(locker_module.encode('utf-8')) - with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f: - f.write(locking_import_module.encode('utf-8')) - self.addCleanup(forget, "locker") - self.addCleanup(forget, "locking_import") - - import locker - - pickle_bytes = ( - b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.') - - # Then try to unpickle two of these simultaneously - # One of them will cause the module import, and we want it to block - # until the other one either: - # - fails (before the patch for this issue) - # - blocks on the import lock for the module, as it should - results = [] - barrier = threading.Barrier(3) - def t(): - # This ensures the threads have all started - # presumably barrier release is faster than thread startup - barrier.wait() - results.append(pickle.loads(pickle_bytes)) - - t1 = threading.Thread(target=t) - t2 = threading.Thread(target=t) - t1.start() - t2.start() - - barrier.wait() - # could have delay here - locker.barrier.wait() - - t1.join() - t2.join() - - self.assertEqual(len(results), 2) - class BigmemPickleTests(unittest.TestCase): From a0425ef01015ebf7208f9e062330825ba3587f30 Mon Sep 17 00:00:00 2001 From: Tim Burgess Date: Wed, 13 Feb 2019 08:32:50 +0800 Subject: [PATCH 7/7] better assertion --- Lib/test/pickletester.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index dc7980febf58d9..293393fe37135a 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1233,7 +1233,11 @@ def t(): t1.join() t2.join() - self.assertEqual(len(results), 2) + from locking_import import ToBeUnpickled + self.assertEqual( + [type(x) for x in results], + [ToBeUnpickled] * 2) + class AbstractPickleTests(unittest.TestCase):