From b01c352f06a7bb1291e4213079e821aa89fa8daa Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 7 Feb 2023 22:13:07 +0000 Subject: [PATCH 1/7] GH-101362: Optimise PurePath(PurePath(...)) The previous `_parse_args()` method pulled the `_parts` out of any supplied `PurePath` objects; these were subsequently joined in `_from_parts()` using `os.path.join()`. This is actually a slower form of joining than calling `fspath()` on the path object, because it doesn't take advantage of the fact that the contents of `_parts` is normalized! This reduces the time taken to run `PurePath("foo", "bar") by ~20%, and the time taken to run `PurePath(p, "cheese")`, where `p = PurePath("/foo", "bar", "baz")`, by ~40%. --- Lib/pathlib.py | 36 ++++++------------- ...-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst | 3 ++ 2 files changed, 14 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 17659bcd3e2d7f..91dd6bc5b60cb0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -278,6 +278,14 @@ def _parse_parts(cls, parts): sep = cls._flavour.sep altsep = cls._flavour.altsep path = cls._flavour.join(*parts) + if isinstance(path, str): + # Force-cast str subclasses to str (issue #21127) + path = str(path) + else: + raise TypeError( + "argument should be a str object or an os.PathLike " + "object returning str, not %r" + % type(path)) if altsep: path = path.replace(altsep, sep) drv, root, rel = cls._flavour.splitroot(path) @@ -288,32 +296,10 @@ def _parse_parts(cls, parts): parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed - @classmethod - def _parse_args(cls, args): - # This is useful when you don't want to create an instance, just - # canonicalize some constructor arguments. - parts = [] - for a in args: - if isinstance(a, PurePath): - parts += a._parts - else: - a = os.fspath(a) - if isinstance(a, str): - # Force-cast str subclasses to str (issue #21127) - parts.append(str(a)) - else: - raise TypeError( - "argument should be a str object or an os.PathLike " - "object returning str, not %r" - % type(a)) - return cls._parse_parts(parts) - @classmethod def _from_parts(cls, args): - # We need to call _parse_args on the instance, so as to get the - # right flavour. self = object.__new__(cls) - drv, root, parts = self._parse_args(args) + drv, root, parts = self._parse_parts(args) self._drv = drv self._root = root self._parts = parts @@ -572,7 +558,7 @@ def joinpath(self, *args): anchored). """ drv1, root1, parts1 = self._drv, self._root, self._parts - drv2, root2, parts2 = self._parse_args(args) + drv2, root2, parts2 = self._parse_parts(args) if root2: if not drv2 and drv1: return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:]) @@ -664,7 +650,7 @@ def match(self, path_pattern): return True # Can't subclass os.PathLike from PurePath and keep the constructor -# optimizations in PurePath._parse_args(). +# optimizations in PurePath.__slots__. os.PathLike.register(PurePath) diff --git a/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst new file mode 100644 index 00000000000000..8bbaf868017872 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst @@ -0,0 +1,3 @@ +Speed up :class:`pathlib.PurePath` construction by handling arguments more +uniformly. When a path argument is supplied, we use its string +representation rather than joining its parts with :func:`os.path.join`. From 6db6d558e6f4534f9901b26df9c92ac34ff4aa55 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 25 Feb 2023 01:03:07 +0000 Subject: [PATCH 2/7] Tweak PurePath constructor docs to hint that we call `os.fspath()` --- Doc/library/pathlib.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index f222745a2c56bc..a6cbeb38569db9 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -105,8 +105,8 @@ we also call *flavours*: PurePosixPath('setup.py') Each element of *pathsegments* can be either a string representing a - path segment, an object implementing the :class:`os.PathLike` interface - which returns a string, or another path object:: + path segment, or an object implementing the :class:`os.PathLike` interface + which returns a string, such as another path object:: >>> PurePath('foo', 'some/path', 'bar') PurePosixPath('foo/some/path/bar') From 1b9e2d377c5e57828b6dad601a1de86d30710324 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 28 Feb 2023 12:18:45 +0000 Subject: [PATCH 3/7] Add tests for bytes paths. --- Lib/test/test_pathlib.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index a596795b44f0fa..eefc09d75890e8 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -166,6 +166,31 @@ def test_constructor_common(self): self.assertEqual(P(P('a'), P('b')), P('a/b')) self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) + def test_bytes(self): + P = self.cls + with self.assertRaises(TypeError): + P(b'a') + with self.assertRaises(TypeError): + P(b'a', 'b') + with self.assertRaises(TypeError): + P('a', b'b') + with self.assertRaises(TypeError): + P('a').joinpath(b'b') + with self.assertRaises(TypeError): + P('a') / b'b' + with self.assertRaises(TypeError): + b'a' / P('b') + with self.assertRaises(TypeError): + P('a').match(b'b') + with self.assertRaises(TypeError): + P('a').relative_to(b'b') + with self.assertRaises(TypeError): + P('a').with_name(b'b') + with self.assertRaises(TypeError): + P('a').with_stem(b'b') + with self.assertRaises(TypeError): + P('a').with_suffix(b'b') + def _check_str_subclass(self, *args): # Issue #21127: it should be possible to construct a PurePath object # from a str subclass instance, and it then gets converted to From 37be0837b8be06eeacd34822bb663ae7be81d7f7 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 5 Mar 2023 22:47:53 +0000 Subject: [PATCH 4/7] Apply suggestions from code review Co-authored-by: Alex Waygood --- Doc/library/pathlib.rst | 3 ++- Lib/pathlib.py | 3 +-- .../Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 26934bf1b69087..8e91936680fab8 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -106,7 +106,8 @@ we also call *flavours*: Each element of *pathsegments* can be either a string representing a path segment, or an object implementing the :class:`os.PathLike` interface - which returns a string, such as another path object:: + where the :meth:`~os.PathLike.__fspath__` method returns a string, + such as another path object:: >>> PurePath('foo', 'some/path', 'bar') PurePosixPath('foo/some/path/bar') diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e3353b258d06e9..a3c5640815bc3c 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -287,8 +287,7 @@ def _parse_parts(cls, parts): else: raise TypeError( "argument should be a str object or an os.PathLike " - "object returning str, not %r" - % type(path)) + f"object where __fspath__ returns a str, not {type(path)!r}") if altsep: path = path.replace(altsep, sep) drv, root, rel = cls._flavour.splitroot(path) diff --git a/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst index 8bbaf868017872..bc9efee028c6e0 100644 --- a/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst +++ b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst @@ -1,3 +1,4 @@ Speed up :class:`pathlib.PurePath` construction by handling arguments more -uniformly. When a path argument is supplied, we use its string -representation rather than joining its parts with :func:`os.path.join`. +uniformly. When a :class:`pathlib.Path argument is supplied, +we use its string representation rather than joining its parts +with :func:`os.path.join`. From 782a940925736850dd2704480c49f062eb145d2e Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 5 Mar 2023 23:05:06 +0000 Subject: [PATCH 5/7] Update Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst Co-authored-by: Alex Waygood --- .../next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst index bc9efee028c6e0..c05f92ae699de9 100644 --- a/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst +++ b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst @@ -1,4 +1,4 @@ Speed up :class:`pathlib.PurePath` construction by handling arguments more -uniformly. When a :class:`pathlib.Path argument is supplied, +uniformly. When a :class:`pathlib.Path` argument is supplied, we use its string representation rather than joining its parts with :func:`os.path.join`. From 0ecae3d57bc60182dece7215ade883aa9bb1cd78 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 5 Mar 2023 23:08:45 +0000 Subject: [PATCH 6/7] Use `assertRaisesRegex()` to check TypeError message in test. --- Lib/test/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ce031b94ec6e25..ca3c441aefb065 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -168,7 +168,7 @@ def test_constructor_common(self): def test_bytes(self): P = self.cls - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, "should be a str object or an os.PathLike object"): P(b'a') with self.assertRaises(TypeError): P(b'a', 'b') From fbfeedbae8f33c73ce7253023a22f7b29fadc17e Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 5 Mar 2023 23:25:00 +0000 Subject: [PATCH 7/7] Improve TypeError message --- Lib/pathlib.py | 5 +++-- Lib/test/test_pathlib.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a3c5640815bc3c..3b7276b16c8a1f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -286,8 +286,9 @@ def _parse_parts(cls, parts): path = str(path) else: raise TypeError( - "argument should be a str object or an os.PathLike " - f"object where __fspath__ returns a str, not {type(path)!r}") + "argument should be a str or an os.PathLike " + "object where __fspath__ returns a str, " + f"not {type(path).__name__!r}") if altsep: path = path.replace(altsep, sep) drv, root, rel = cls._flavour.splitroot(path) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index ca3c441aefb065..df9c1f6ba65deb 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -168,7 +168,9 @@ def test_constructor_common(self): def test_bytes(self): P = self.cls - with self.assertRaisesRegex(TypeError, "should be a str object or an os.PathLike object"): + message = (r"argument should be a str or an os\.PathLike object " + r"where __fspath__ returns a str, not 'bytes'") + with self.assertRaisesRegex(TypeError, message): P(b'a') with self.assertRaises(TypeError): P(b'a', 'b')