From 9779342edecb9a198dcb363d63280fddbe4e8fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Tue, 13 May 2025 15:06:58 +0200 Subject: [PATCH 1/5] DOC Add note about Memory pickle security consideration (#1722) Co-authored-by: Olivier Grisel --- doc/memory.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/memory.rst b/doc/memory.rst index bda47677a..429ee4d87 100644 --- a/doc/memory.rst +++ b/doc/memory.rst @@ -60,6 +60,15 @@ A simple example: Running f(2) 2 +.. warning:: + + :class:`~joblib.Memory` use pickle to load saved results from the file system. + This means that pickle security considerations apply, see `pickle documentation + `_ for more details. + In particular, if an adversary gains write access to the cache directory, + it could lead to arbitrary code execution by the Python process(es) attempting to + load compromised cached entries from that folder. + Comparison with `memoize` ~~~~~~~~~~~~~~~~~~~~~~~~~ From 4529751aa5443ed0b5b3b2d278b835f49466c77b Mon Sep 17 00:00:00 2001 From: Thomas Moreau Date: Fri, 23 May 2025 12:08:32 +0200 Subject: [PATCH 2/5] MTN vendor loky 3.5.5 (#1724) --- CHANGES.rst | 3 +++ joblib/externals/loky/__init__.py | 2 +- joblib/externals/loky/backend/popen_loky_posix.py | 2 +- joblib/externals/loky/backend/resource_tracker.py | 2 +- joblib/externals/loky/backend/spawn.py | 2 +- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3beffb2d0..dd680a5f2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,9 @@ In development - Fix backend hints causing errors when no multiprocessing is present https://github.com/joblib/joblib/issues/1721 +- Vendor ``loky3.5.5`` fixing the resource_tracker clean up with earlier Python + versions. https://github.com/joblib/joblib/issues/1724 + Release 1.5.0 -- 2025/05/03 --------------------------- diff --git a/joblib/externals/loky/__init__.py b/joblib/externals/loky/__init__.py index cf08e3422..af28a2406 100644 --- a/joblib/externals/loky/__init__.py +++ b/joblib/externals/loky/__init__.py @@ -42,4 +42,4 @@ ] -__version__ = "3.5.3" +__version__ = "3.5.5" diff --git a/joblib/externals/loky/backend/popen_loky_posix.py b/joblib/externals/loky/backend/popen_loky_posix.py index 74fc3f264..58753036c 100644 --- a/joblib/externals/loky/backend/popen_loky_posix.py +++ b/joblib/externals/loky/backend/popen_loky_posix.py @@ -119,7 +119,7 @@ def _launch(self, process_obj): reduction._mk_inheritable(child_w) reduction._mk_inheritable(tracker_fd) self._fds += [child_r, child_w, tracker_fd] - if sys.version_info >= (3, 8) and os.name == "posix": + if os.name == "posix": mp_tracker_fd = prep_data["mp_tracker_fd"] self.duplicate_for_child(mp_tracker_fd) diff --git a/joblib/externals/loky/backend/resource_tracker.py b/joblib/externals/loky/backend/resource_tracker.py index c0176daf8..ff1aea1de 100644 --- a/joblib/externals/loky/backend/resource_tracker.py +++ b/joblib/externals/loky/backend/resource_tracker.py @@ -175,7 +175,7 @@ def __del__(self): # ignore error due to trying to clean up child process which has already been # shutdown on windows See https://github.com/joblib/loky/pull/450 # This is only required if __del__ is defined - if not hasattr(ResourceTracker, "__del__"): + if not hasattr(_ResourceTracker, "__del__"): return try: super().__del__() diff --git a/joblib/externals/loky/backend/spawn.py b/joblib/externals/loky/backend/spawn.py index f1963e94b..9a6ef9d97 100644 --- a/joblib/externals/loky/backend/spawn.py +++ b/joblib/externals/loky/backend/spawn.py @@ -87,7 +87,7 @@ def get_preparation_data(name, init_main_module=True): else: d["tracker_fd"] = _resource_tracker._fd - if sys.version_info >= (3, 8) and os.name == "posix": + if os.name == "posix": # joblib/loky#242: allow loky processes to retrieve the resource # tracker of their parent in case the child processes depickles # shared_memory objects, that are still tracked by multiprocessing's From cb554a51412e385f911b18697bfe9bb407c14bcc Mon Sep 17 00:00:00 2001 From: Thomas Moreau Date: Fri, 23 May 2025 14:03:05 +0200 Subject: [PATCH 3/5] RELEASE 1.5.1 (#1725) --- CHANGES.rst | 4 ++-- joblib/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index dd680a5f2..f5de7568d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,8 @@ Latest changes ============== -In development --------------- +Release 1.5.1 - 2025/05/23 +-------------------------- - Fix backend hints causing errors when no multiprocessing is present https://github.com/joblib/joblib/issues/1721 diff --git a/joblib/__init__.py b/joblib/__init__.py index 4189abe33..77a05caeb 100644 --- a/joblib/__init__.py +++ b/joblib/__init__.py @@ -106,7 +106,7 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "1.6.dev0" +__version__ = "1.5.1" import os From 77a3255f61d91ed3521be086dc7b035b424c15c1 Mon Sep 17 00:00:00 2001 From: tommoral Date: Fri, 23 May 2025 14:10:36 +0200 Subject: [PATCH 4/5] MNT back to dev mode --- CHANGES.rst | 3 +++ joblib/__init__.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f5de7568d..ec1c599f0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,9 @@ Latest changes ============== +In development +-------------- + Release 1.5.1 - 2025/05/23 -------------------------- diff --git a/joblib/__init__.py b/joblib/__init__.py index 77a05caeb..4189abe33 100644 --- a/joblib/__init__.py +++ b/joblib/__init__.py @@ -106,7 +106,7 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "1.5.1" +__version__ = "1.6.dev0" import os From 0672e76ad10e9f463221a58fb7bffa127287a4ba Mon Sep 17 00:00:00 2001 From: LunarLanding <4441338+LunarLanding@users.noreply.github.com> Date: Fri, 23 May 2025 18:34:17 +0200 Subject: [PATCH 5/5] ENH safer cache write in distributed systems (#1656) Co-authored-by: Thomas Moreau Co-authored-by: Olivier Grisel --- CHANGES.rst | 12 ++++++++++-- joblib/_store_backends.py | 9 ++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ec1c599f0..4804bcd03 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,14 @@ Latest changes In development -------------- +Memory: +~~~~~~~ + +- Ensure that temporary files managed by the ``Memory`` object do not collide + when using the same cache directory when the cache directory is accessed + concurrently from different nodes on a cluster with a shared filesystem. + https://github.com/joblib/joblib/pull/1656 + Release 1.5.1 - 2025/05/23 -------------------------- @@ -32,8 +40,8 @@ Memory: if it is in cache memory. https://github.com/joblib/joblib/pull/1584 -- The Memory object now automatically creates a .gitignore file in its cache - directory, instructing git to ignore the entire folder. +- The ``Memory`` object now automatically creates a ``.gitignore`` file in its + cache directory, instructing git to ignore the entire folder. https://github.com/joblib/joblib/pull/1674 Parallel: diff --git a/joblib/_store_backends.py b/joblib/_store_backends.py index c822f0f93..3f1a2db87 100644 --- a/joblib/_store_backends.py +++ b/joblib/_store_backends.py @@ -10,6 +10,7 @@ import shutil import threading import time +import uuid import warnings from abc import ABCMeta, abstractmethod from pickle import PicklingError @@ -30,8 +31,14 @@ class CacheWarning(Warning): def concurrency_safe_write(object_to_write, filename, write_func): """Writes an object into a unique file in a concurrency-safe way.""" + # Temporary name is composed of UUID, process_id and thread_id to avoid + # collisions due to concurrent write. + # UUID is unique across nodes and time and help avoid collisions, even if + # the cache folder is shared by several Python processes with the same pid and + # thread id on different nodes of a cluster for instance. thread_id = id(threading.current_thread()) - temporary_filename = "{}.thread-{}-pid-{}".format(filename, thread_id, os.getpid()) + temporary_filename = f"{filename}.{uuid.uuid4().hex}-{os.getpid()}-{thread_id}" + write_func(object_to_write, temporary_filename) return temporary_filename