From ca5e10e2dc56e2ba45a4a1ebab003438d458dafd Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 26 May 2023 21:27:23 +0100 Subject: [PATCH 1/3] GH-104996: Defer joining of `pathlib.PurePath()` arguments. Joining of arguments is moved to `_load_parts`, which is called when a normalized path is needed. --- Lib/pathlib.py | 33 +++++++++++-------- ...-05-26-21-24-06.gh-issue-104996.aaW78g.rst | 2 ++ 2 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8cb5279d735a30..59907b56e27d1b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -244,9 +244,9 @@ class PurePath(os.PathLike): """ __slots__ = ( - # The `_raw_path` slot stores an unnormalized string path. This is set + # The `_raw_paths` slot stores unnormalized string paths. This is set # in the `__init__()` method. - '_raw_path', + '_raw_paths', # The `_drv`, `_root` and `_tail_cached` slots store parsed and # normalized parts of the path. They are set when any of the `drive`, @@ -299,10 +299,11 @@ def __init__(self, *args): paths = [] for arg in args: if isinstance(arg, PurePath): - path = arg._raw_path if arg._flavour is ntpath and self._flavour is posixpath: # GH-103631: Convert separators for backwards compatibility. - path = path.replace('\\', '/') + paths.extend(path.replace('\\', '/') for path in arg._raw_paths) + else: + paths.extend(arg._raw_paths) else: try: path = os.fspath(arg) @@ -313,13 +314,8 @@ def __init__(self, *args): "argument should be a str or an os.PathLike " "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") - paths.append(path) - if len(paths) == 0: - self._raw_path = '' - elif len(paths) == 1: - self._raw_path = paths[0] - else: - self._raw_path = self._flavour.join(*paths) + paths.append(path) + self._raw_paths = paths def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. @@ -349,7 +345,14 @@ def _parse_path(cls, path): return drv, root, parsed def _load_parts(self): - drv, root, tail = self._parse_path(self._raw_path) + paths = self._raw_paths + if len(paths) == 0: + path = '' + elif len(paths) == 1: + path = paths[0] + else: + path = self._flavour.join(*paths) + drv, root, tail = self._parse_path(path) self._drv = drv self._root = root self._tail_cached = tail @@ -673,7 +676,11 @@ def is_absolute(self): # ntpath.isabs() is defective - see GH-44626 . if self._flavour is ntpath: return bool(self.drive and self.root) - return self._flavour.isabs(self._raw_path) + else: + for path in self._raw_paths: + if self._flavour.isabs(path): + return True + return False def is_reserved(self): """Return True if the path contains one of the special names reserved diff --git a/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst new file mode 100644 index 00000000000000..8b81b681af94aa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst @@ -0,0 +1,2 @@ +Improve performance of :class:`pathlib.PurePath` initialisation by +deferring joining of paths when multiple arguments are given. From d528a0aecfa2f2b4b96eeb4353bbab1e7092ec9e Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 7 Jun 2023 22:00:21 +0100 Subject: [PATCH 2/3] Tighten up is_absolute() --- Lib/pathlib.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 59907b56e27d1b..87f904e02a0132 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -673,14 +673,17 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - # ntpath.isabs() is defective - see GH-44626 . if self._flavour is ntpath: + # ntpath.isabs() is defective - see GH-44626. return bool(self.drive and self.root) - else: + elif self._flavour is posixpath: + # Optimization: work with raw paths on POSIX. for path in self._raw_paths: - if self._flavour.isabs(path): + if path.startswith('/'): return True return False + else: + return self._flavour.isabs(str(self)) def is_reserved(self): """Return True if the path contains one of the special names reserved From 08c69bf0a123dadfb455c32216ad6ec39a8c268c Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 7 Jun 2023 22:40:07 +0100 Subject: [PATCH 3/3] De-duplicate on string path. --- Lib/pathlib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 98eab482ae2c67..d8c597f1027f30 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -195,9 +195,10 @@ def _select_unique(paths): yielded = set() try: for path in paths: - if path not in yielded: + path_str = str(path) + if path_str not in yielded: yield path - yielded.add(path) + yielded.add(path_str) finally: yielded.clear()