diff --git a/CHANGES.rst b/CHANGES.rst index 3beffb2d0..4804bcd03 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,9 +4,23 @@ Latest changes In development -------------- +Memory: +~~~~~~~ + +- Ensure that temporary files managed by the ``Memory`` object do not collide + when using the same cache directory when the cache directory is accessed + concurrently from different nodes on a cluster with a shared filesystem. + https://github.com/joblib/joblib/pull/1656 + +Release 1.5.1 - 2025/05/23 +-------------------------- + - Fix backend hints causing errors when no multiprocessing is present https://github.com/joblib/joblib/issues/1721 +- Vendor ``loky3.5.5`` fixing the resource_tracker clean up with earlier Python + versions. https://github.com/joblib/joblib/issues/1724 + Release 1.5.0 -- 2025/05/03 --------------------------- @@ -26,8 +40,8 @@ Memory: if it is in cache memory. https://github.com/joblib/joblib/pull/1584 -- The Memory object now automatically creates a .gitignore file in its cache - directory, instructing git to ignore the entire folder. +- The ``Memory`` object now automatically creates a ``.gitignore`` file in its + cache directory, instructing git to ignore the entire folder. https://github.com/joblib/joblib/pull/1674 Parallel: diff --git a/doc/memory.rst b/doc/memory.rst index bda47677a..429ee4d87 100644 --- a/doc/memory.rst +++ b/doc/memory.rst @@ -60,6 +60,15 @@ A simple example: Running f(2) 2 +.. warning:: + + :class:`~joblib.Memory` use pickle to load saved results from the file system. + This means that pickle security considerations apply, see `pickle documentation + `_ for more details. + In particular, if an adversary gains write access to the cache directory, + it could lead to arbitrary code execution by the Python process(es) attempting to + load compromised cached entries from that folder. + Comparison with `memoize` ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/joblib/_store_backends.py b/joblib/_store_backends.py index c822f0f93..3f1a2db87 100644 --- a/joblib/_store_backends.py +++ b/joblib/_store_backends.py @@ -10,6 +10,7 @@ import shutil import threading import time +import uuid import warnings from abc import ABCMeta, abstractmethod from pickle import PicklingError @@ -30,8 +31,14 @@ class CacheWarning(Warning): def concurrency_safe_write(object_to_write, filename, write_func): """Writes an object into a unique file in a concurrency-safe way.""" + # Temporary name is composed of UUID, process_id and thread_id to avoid + # collisions due to concurrent write. + # UUID is unique across nodes and time and help avoid collisions, even if + # the cache folder is shared by several Python processes with the same pid and + # thread id on different nodes of a cluster for instance. thread_id = id(threading.current_thread()) - temporary_filename = "{}.thread-{}-pid-{}".format(filename, thread_id, os.getpid()) + temporary_filename = f"{filename}.{uuid.uuid4().hex}-{os.getpid()}-{thread_id}" + write_func(object_to_write, temporary_filename) return temporary_filename diff --git a/joblib/externals/loky/__init__.py b/joblib/externals/loky/__init__.py index cf08e3422..af28a2406 100644 --- a/joblib/externals/loky/__init__.py +++ b/joblib/externals/loky/__init__.py @@ -42,4 +42,4 @@ ] -__version__ = "3.5.3" +__version__ = "3.5.5" diff --git a/joblib/externals/loky/backend/popen_loky_posix.py b/joblib/externals/loky/backend/popen_loky_posix.py index 74fc3f264..58753036c 100644 --- a/joblib/externals/loky/backend/popen_loky_posix.py +++ b/joblib/externals/loky/backend/popen_loky_posix.py @@ -119,7 +119,7 @@ def _launch(self, process_obj): reduction._mk_inheritable(child_w) reduction._mk_inheritable(tracker_fd) self._fds += [child_r, child_w, tracker_fd] - if sys.version_info >= (3, 8) and os.name == "posix": + if os.name == "posix": mp_tracker_fd = prep_data["mp_tracker_fd"] self.duplicate_for_child(mp_tracker_fd) diff --git a/joblib/externals/loky/backend/resource_tracker.py b/joblib/externals/loky/backend/resource_tracker.py index c0176daf8..ff1aea1de 100644 --- a/joblib/externals/loky/backend/resource_tracker.py +++ b/joblib/externals/loky/backend/resource_tracker.py @@ -175,7 +175,7 @@ def __del__(self): # ignore error due to trying to clean up child process which has already been # shutdown on windows See https://github.com/joblib/loky/pull/450 # This is only required if __del__ is defined - if not hasattr(ResourceTracker, "__del__"): + if not hasattr(_ResourceTracker, "__del__"): return try: super().__del__() diff --git a/joblib/externals/loky/backend/spawn.py b/joblib/externals/loky/backend/spawn.py index f1963e94b..9a6ef9d97 100644 --- a/joblib/externals/loky/backend/spawn.py +++ b/joblib/externals/loky/backend/spawn.py @@ -87,7 +87,7 @@ def get_preparation_data(name, init_main_module=True): else: d["tracker_fd"] = _resource_tracker._fd - if sys.version_info >= (3, 8) and os.name == "posix": + if os.name == "posix": # joblib/loky#242: allow loky processes to retrieve the resource # tracker of their parent in case the child processes depickles # shared_memory objects, that are still tracked by multiprocessing's