From 7a582518ba3c9965622bc1b19199b0d3b857ae9e Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Mon, 15 May 2023 18:51:19 +0100
Subject: [PATCH 01/14] GH-102613: Simplify implementation of
 `pathlib.Path.glob()`

This commit replaces selector classes with selector functions. These
generators directly yield results rather calling through to their
successor. A new internal `Path._glob()` takes care to chain these
generators together, which simplifies the lazy algorithm and slightly
improves performance.
---
 Lib/pathlib.py                                | 170 ++++++------------
 ...-05-15-18-57-42.gh-issue-102613.YD9yx-.rst |   2 +
 2 files changed, 57 insertions(+), 115 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index ef7c47c9e775e4..9997d6c515b160 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -54,6 +54,7 @@ def _ignore_error(exception):
             getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
 
 
+@functools.lru_cache()
 def _is_case_sensitive(flavour):
     return flavour.normcase('Aa') == 'Aa'
 
@@ -62,137 +63,51 @@ def _is_case_sensitive(flavour):
 #
 
 @functools.lru_cache()
-def _make_selector(pattern_parts, flavour, case_sensitive):
-    pat = pattern_parts[0]
-    if not pat:
-        return _TerminatingSelector()
-    if pat == '**':
-        child_parts_idx = 1
-        while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == '**':
-            child_parts_idx += 1
-        child_parts = pattern_parts[child_parts_idx:]
-        if '**' in child_parts:
-            cls = _DoubleRecursiveWildcardSelector
-        else:
-            cls = _RecursiveWildcardSelector
-    else:
-        child_parts = pattern_parts[1:]
-        if pat == '..':
-            cls = _ParentSelector
-        elif '**' in pat:
-            raise ValueError("Invalid pattern: '**' can only be an entire path component")
-        else:
-            cls = _WildcardSelector
-    return cls(pat, child_parts, flavour, case_sensitive)
-
-
-class _Selector:
-    """A selector matches a specific glob pattern part against the children
-    of a given path."""
-
-    def __init__(self, child_parts, flavour, case_sensitive):
-        self.child_parts = child_parts
-        if child_parts:
-            self.successor = _make_selector(child_parts, flavour, case_sensitive)
-            self.dironly = True
-        else:
-            self.successor = _TerminatingSelector()
-            self.dironly = False
-
-    def select_from(self, parent_path):
-        """Iterate over all child paths of `parent_path` matched by this
-        selector.  This can contain parent_path itself."""
-        path_cls = type(parent_path)
-        scandir = path_cls._scandir
-        if not parent_path.is_dir():
-            return iter([])
-        return self._select_from(parent_path, scandir)
-
-
-class _TerminatingSelector:
-
-    def _select_from(self, parent_path, scandir):
-        yield parent_path
-
-
-class _ParentSelector(_Selector):
-
-    def __init__(self, name, child_parts, flavour, case_sensitive):
-        _Selector.__init__(self, child_parts, flavour, case_sensitive)
-
-    def _select_from(self,  parent_path, scandir):
-        path = parent_path._make_child_relpath('..')
-        for p in self.successor._select_from(path, scandir):
-            yield p
+def _compile_pattern_part(pattern_part, case_sensitive):
+    flags = re.NOFLAG if case_sensitive else re.IGNORECASE
+    return re.compile(fnmatch.translate(pattern_part), flags=flags).fullmatch
 
 
-class _WildcardSelector(_Selector):
-
-    def __init__(self, pat, child_parts, flavour, case_sensitive):
-        _Selector.__init__(self, child_parts, flavour, case_sensitive)
-        if case_sensitive is None:
-            # TODO: evaluate case-sensitivity of each directory in _select_from()
-            case_sensitive = _is_case_sensitive(flavour)
-        flags = re.NOFLAG if case_sensitive else re.IGNORECASE
-        self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
-
-    def _select_from(self, parent_path, scandir):
+def _select_children(paths, dir_only, match):
+    for path in paths:
         try:
             # We must close the scandir() object before proceeding to
             # avoid exhausting file descriptors when globbing deep trees.
-            with scandir(parent_path) as scandir_it:
+            with path._scandir() as scandir_it:
                 entries = list(scandir_it)
         except OSError:
             pass
         else:
             for entry in entries:
-                if self.dironly:
+                if dir_only:
                     try:
                         if not entry.is_dir():
                             continue
                     except OSError:
                         continue
                 name = entry.name
-                if self.match(name):
-                    path = parent_path._make_child_relpath(name)
-                    for p in self.successor._select_from(path, scandir):
-                        yield p
-
+                if match(name):
+                    yield path._make_child_relpath(name)
 
-class _RecursiveWildcardSelector(_Selector):
 
-    def __init__(self, pat, child_parts, flavour, case_sensitive):
-        _Selector.__init__(self, child_parts, flavour, case_sensitive)
-
-    def _iterate_directories(self, parent_path):
-        yield parent_path
-        for dirpath, dirnames, _ in parent_path.walk():
+def _select_recursive(paths):
+    for path in paths:
+        yield path
+        for dirpath, dirnames, _ in path.walk():
             for dirname in dirnames:
                 yield dirpath._make_child_relpath(dirname)
 
-    def _select_from(self, parent_path, scandir):
-        successor_select = self.successor._select_from
-        for starting_point in self._iterate_directories(parent_path):
-            for p in successor_select(starting_point, scandir):
-                yield p
-
 
-class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
-    """
-    Like _RecursiveWildcardSelector, but also de-duplicates results from
-    successive selectors. This is necessary if the pattern contains
-    multiple non-adjacent '**' segments.
-    """
-
-    def _select_from(self, parent_path, scandir):
-        yielded = set()
-        try:
-            for p in super()._select_from(parent_path, scandir):
-                if p not in yielded:
-                    yield p
-                    yielded.add(p)
-        finally:
-            yielded.clear()
+def _select_unique(paths):
+    yielded = set()
+    try:
+        for path in paths:
+            raw_path = path._raw_path
+            if raw_path not in yielded:
+                yield path
+                yielded.add(raw_path)
+    finally:
+        yielded.clear()
 
 
 #
@@ -996,9 +911,7 @@ def glob(self, pattern, *, case_sensitive=None):
             raise NotImplementedError("Non-relative patterns are unsupported")
         if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
             pattern_parts.append('')
-        selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
-        for p in selector.select_from(self):
-            yield p
+        return self._glob(pattern_parts, case_sensitive)
 
     def rglob(self, pattern, *, case_sensitive=None):
         """Recursively yield all existing files (of any kind, including
@@ -1011,9 +924,36 @@ def rglob(self, pattern, *, case_sensitive=None):
             raise NotImplementedError("Non-relative patterns are unsupported")
         if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
             pattern_parts.append('')
-        selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
-        for p in selector.select_from(self):
-            yield p
+        return self._glob(['**'] + pattern_parts, case_sensitive)
+
+    def _glob(self, pattern_parts, case_sensitive):
+        if case_sensitive is None:
+            # TODO: evaluate case-sensitivity of each directory in _select_children().
+            case_sensitive = _is_case_sensitive(self._flavour)
+        paths = iter([self] if self.is_dir() else [])
+        deduplicate = False
+        part_idx = 0
+        while part_idx < len(pattern_parts):
+            part = pattern_parts[part_idx]
+            part_idx += 1
+            if part == '':
+                pass
+            elif part == '..':
+                paths = (path._make_child_relpath('..') for path in paths)
+            elif part == '**':
+                while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
+                    part_idx += 1
+                paths = _select_recursive(paths)
+                if deduplicate:
+                    paths = _select_unique(paths)
+                deduplicate = True
+            elif '**' in part:
+                raise ValueError("Invalid pattern: '**' can only be an entire path component")
+            else:
+                dir_only = part_idx < len(pattern_parts)
+                match = _compile_pattern_part(part, case_sensitive)
+                paths = _select_children(paths, dir_only, match)
+        return paths
 
     def walk(self, top_down=True, on_error=None, follow_symlinks=False):
         """Walk the directory tree from this directory, similar to os.walk()."""
diff --git a/Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst b/Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst
new file mode 100644
index 00000000000000..f9de075112bac3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst
@@ -0,0 +1,2 @@
+Improve performance of :meth:`pathlib.Path.glob` by simplifying its
+implementation.

From d5b1836733e8400068d441f02dfe10c5dcbcb3e4 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Mon, 15 May 2023 19:37:14 +0100
Subject: [PATCH 02/14] Speed up matching `*`

---
 Lib/pathlib.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 9997d6c515b160..272ca2d7bf8f2c 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -64,6 +64,8 @@ def _is_case_sensitive(flavour):
 
 @functools.lru_cache()
 def _compile_pattern_part(pattern_part, case_sensitive):
+    if pattern_part == '*':
+        return None
     flags = re.NOFLAG if case_sensitive else re.IGNORECASE
     return re.compile(fnmatch.translate(pattern_part), flags=flags).fullmatch
 
@@ -86,7 +88,7 @@ def _select_children(paths, dir_only, match):
                     except OSError:
                         continue
                 name = entry.name
-                if match(name):
+                if match is None or match(name):
                     yield path._make_child_relpath(name)
 
 

From d5c86c6e003b529d9c367815615c7644d2f7d5f1 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Mon, 15 May 2023 19:47:32 +0100
Subject: [PATCH 03/14] Add comments, docstrings.

---
 Lib/pathlib.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 272ca2d7bf8f2c..e13229081b163e 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -64,6 +64,8 @@ def _is_case_sensitive(flavour):
 
 @functools.lru_cache()
 def _compile_pattern_part(pattern_part, case_sensitive):
+    """Compile given glob pattern to a re.Pattern object (observing case
+    sensitivity), or None if the pattern should match everything."""
     if pattern_part == '*':
         return None
     flags = re.NOFLAG if case_sensitive else re.IGNORECASE
@@ -71,6 +73,7 @@ def _compile_pattern_part(pattern_part, case_sensitive):
 
 
 def _select_children(paths, dir_only, match):
+    """Yield direct children of given paths, filtering by name and type."""
     for path in paths:
         try:
             # We must close the scandir() object before proceeding to
@@ -93,6 +96,7 @@ def _select_children(paths, dir_only, match):
 
 
 def _select_recursive(paths):
+    """Yield given paths and all their subdirectories, recursively."""
     for path in paths:
         yield path
         for dirpath, dirnames, _ in path.walk():
@@ -101,6 +105,7 @@ def _select_recursive(paths):
 
 
 def _select_unique(paths):
+    """Yields the given paths, filtering out duplicates."""
     yielded = set()
     try:
         for path in paths:
@@ -939,13 +944,16 @@ def _glob(self, pattern_parts, case_sensitive):
             part = pattern_parts[part_idx]
             part_idx += 1
             if part == '':
+                # Trailing slash.
                 pass
             elif part == '..':
                 paths = (path._make_child_relpath('..') for path in paths)
             elif part == '**':
+                # Collapse adjacent '**' segments.
                 while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
                     part_idx += 1
                 paths = _select_recursive(paths)
+                # De-duplicate if we've already seen a '**' segment.
                 if deduplicate:
                     paths = _select_unique(paths)
                 deduplicate = True

From 40056194ebaac26f30c917bf32b34eacfee99d4a Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Wed, 31 May 2023 19:01:37 +0100
Subject: [PATCH 04/14] Add support for matching files recursively.

---
 Lib/pathlib.py | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index a72fdcde967d6c..6d624caf2cb6e7 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -158,15 +158,18 @@ def _select_children(paths, dir_only, follow_symlinks, match):
                     yield path._make_child_relpath(name)
 
 
-def _select_recursive(paths, follow_symlinks):
+def _select_recursive(paths, dir_only, follow_symlinks):
     """Yield given paths and all their subdirectories, recursively."""
     if follow_symlinks is None:
         follow_symlinks = False
     for path in paths:
         yield path
-        for dirpath, dirnames, _ in path.walk(follow_symlinks=follow_symlinks):
+        for dirpath, dirnames, filenames in path.walk(follow_symlinks=follow_symlinks):
             for dirname in dirnames:
                 yield dirpath._make_child_relpath(dirname)
+            if not dir_only:
+                for filename in filenames:
+                    yield dirpath._make_child_relpath(filename)
 
 
 def _select_unique(paths):
@@ -994,14 +997,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
         kind, including directories) matching the given relative pattern.
         """
         sys.audit("pathlib.Path.glob", self, pattern)
-        if not pattern:
-            raise ValueError("Unacceptable pattern: {!r}".format(pattern))
-        drv, root, pattern_parts = self._parse_path(pattern)
-        if drv or root:
-            raise NotImplementedError("Non-relative patterns are unsupported")
-        if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
-            pattern_parts.append('')
-        return self._glob(pattern_parts, case_sensitive, follow_symlinks)
+        return self._glob(pattern, case_sensitive, follow_symlinks)
 
     def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
         """Recursively yield all existing files (of any kind, including
@@ -1009,14 +1005,23 @@ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
         this subtree.
         """
         sys.audit("pathlib.Path.rglob", self, pattern)
-        drv, root, pattern_parts = self._parse_path(pattern)
-        if drv or root:
+        return self._glob(f'**/{pattern}', case_sensitive, follow_symlinks)
+
+    def _glob(self, pattern, case_sensitive, follow_symlinks):
+        path_pattern = self.with_segments(pattern)
+        if path_pattern.drive or path_pattern.root:
             raise NotImplementedError("Non-relative patterns are unsupported")
-        if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
+        elif not path_pattern._tail:
+            raise ValueError("Unacceptable pattern: {!r}".format(pattern))
+
+        pattern_parts = list(path_pattern._tail)
+        if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
+            # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
+            pattern_parts.append('')
+        if pattern_parts[-1] == '**':
+            # GH-102613: '**' only matches directories. Add trailing slash.
             pattern_parts.append('')
-        return self._glob(['**'] + pattern_parts, case_sensitive, follow_symlinks)
 
-    def _glob(self, pattern_parts, case_sensitive, follow_symlinks):
         if case_sensitive is None:
             # TODO: evaluate case-sensitivity of each directory in _select_children().
             case_sensitive = _is_case_sensitive(self._flavour)
@@ -1035,7 +1040,8 @@ def _glob(self, pattern_parts, case_sensitive, follow_symlinks):
                 # Collapse adjacent '**' segments.
                 while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
                     part_idx += 1
-                paths = _select_recursive(paths, follow_symlinks)
+                dir_only = part_idx < len(pattern_parts)
+                paths = _select_recursive(paths, dir_only, follow_symlinks)
                 # De-duplicate if we've already seen a '**' segment.
                 if deduplicate:
                     paths = _select_unique(paths)

From 9401d36e63ae1741fd3b03d0c40e8b92995d4d88 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Wed, 31 May 2023 19:28:05 +0100
Subject: [PATCH 05/14] Implement walk-and-match algorithm.

---
 Lib/pathlib.py | 42 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 6d624caf2cb6e7..4b98133d0a54af 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -978,18 +978,23 @@ def _scandir(self):
 
     def _make_child_relpath(self, name):
         path_str = str(self)
+        lines_str = self._lines
         tail = self._tail
         if tail:
             path_str = f'{path_str}{self._flavour.sep}{name}'
+            lines_str = f'{lines_str}\n{name}'
         elif path_str != '.':
             path_str = f'{path_str}{name}'
+            lines_str = f'{lines_str}{name}'
         else:
             path_str = name
+            lines_str = name
         path = self.with_segments(path_str)
         path._str = path_str
         path._drv = self.drive
         path._root = self.root
         path._tail_cached = tail + [name]
+        path._lines_cached = lines_str
         return path
 
     def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
@@ -1025,8 +1030,17 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
         if case_sensitive is None:
             # TODO: evaluate case-sensitivity of each directory in _select_children().
             case_sensitive = _is_case_sensitive(self._flavour)
+
+        # If symlinks are handled consistently, and the pattern does not
+        # contain '..' components, then we can use a 'walk-and-match' strategy
+        # when expanding '**' wildcards. When a '**' wildcard is encountered,
+        # all following pattern parts are immediately consumed and used to
+        # build a `re.Pattern` object. This pattern is used to filter the
+        # recursive walk. As a result, pattern parts following a '**' wildcard
+        # do not perform any filesystem access, which can be much faster!
+        filter_paths_supported = follow_symlinks is not None and '..' not in pattern_parts
+        deduplicate_paths = False
         paths = iter([self] if self.is_dir() else [])
-        deduplicate = False
         part_idx = 0
         while part_idx < len(pattern_parts):
             part = pattern_parts[part_idx]
@@ -1037,15 +1051,29 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
             elif part == '..':
                 paths = (path._make_child_relpath('..') for path in paths)
             elif part == '**':
-                # Collapse adjacent '**' segments.
-                while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
-                    part_idx += 1
+                filter_paths = False
+                if filter_paths_supported:
+                    # Consume remaining path components, except trailing slash.
+                    while part_idx < len(pattern_parts) and pattern_parts[part_idx] != '':
+                        filter_paths = True
+                        part_idx += 1
+                else:
+                    # Consume adjacent '**' components.
+                    while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
+                        part_idx += 1
+
                 dir_only = part_idx < len(pattern_parts)
                 paths = _select_recursive(paths, dir_only, follow_symlinks)
-                # De-duplicate if we've already seen a '**' segment.
-                if deduplicate:
+
+                if filter_paths:
+                    # Filter out paths that don't match pattern.
+                    pattern_lines = self.joinpath(path_pattern)._lines
+                    match = _compile_pattern_lines(pattern_lines, case_sensitive).match
+                    paths = (path for path in paths if match(path._lines))
+                elif deduplicate_paths:
+                    # De-duplicate if we've already seen a '**' component.
                     paths = _select_unique(paths)
-                deduplicate = True
+                deduplicate_paths = True
             elif '**' in part:
                 raise ValueError("Invalid pattern: '**' can only be an entire path component")
             else:

From b217587651fd53d2783ca918979f2165377ffb07 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Wed, 31 May 2023 20:56:03 +0100
Subject: [PATCH 06/14] Fix up docs, news blurb.

---
 Doc/library/pathlib.rst                              | 12 ++++++++----
 .../2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst   |  6 ++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst
index 67ef36890d5739..ad801d5d7cdc4b 100644
--- a/Doc/library/pathlib.rst
+++ b/Doc/library/pathlib.rst
@@ -917,6 +917,14 @@ call fails (for example because the path doesn't exist).
        PosixPath('setup.py'),
        PosixPath('test_pathlib.py')]
 
+   .. note::
+      Using the "``**``" pattern in large directory trees may consume
+      an inordinate amount of time.
+
+   .. tip::
+      Set *follow_symlinks* to ``True`` or ``False`` to improve performance
+      of recursive globbing.
+
    By default, or when the *case_sensitive* keyword-only argument is set to
    ``None``, this method matches paths using platform-specific casing rules:
    typically, case-sensitive on POSIX, and case-insensitive on Windows.
@@ -927,10 +935,6 @@ call fails (for example because the path doesn't exist).
    wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
    ``False`` to treat all symlinks as files.
 
-   .. note::
-      Using the "``**``" pattern in large directory trees may consume
-      an inordinate amount of time.
-
    .. audit-event:: pathlib.Path.glob self,pattern pathlib.Path.glob
 
    .. versionchanged:: 3.11
diff --git a/Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst b/Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst
index f9de075112bac3..841a9e1e26d317 100644
--- a/Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst
+++ b/Misc/NEWS.d/next/Library/2023-05-15-18-57-42.gh-issue-102613.YD9yx-.rst
@@ -1,2 +1,4 @@
-Improve performance of :meth:`pathlib.Path.glob` by simplifying its
-implementation.
+Improve performance of :meth:`pathlib.Path.glob` when expanding a pattern with
+a non-terminal "``**``" component by filtering walked paths through a regular
+expression, rather than calling :func:`os.scandir` more than once on each
+directory.

From de7e8570ae501759d7dabe7b3c3b0e4dd6447759 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Wed, 31 May 2023 20:57:55 +0100
Subject: [PATCH 07/14] Fix comment

---
 Lib/pathlib.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 4b98133d0a54af..b7cb6385d370ce 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -1024,7 +1024,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
             # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
             pattern_parts.append('')
         if pattern_parts[-1] == '**':
-            # GH-102613: '**' only matches directories. Add trailing slash.
+            # GH-70303: '**' only matches directories. Add trailing slash.
             pattern_parts.append('')
 
         if case_sensitive is None:

From d1023c7c324d43e3a655b7748f76d9bd23680a17 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Wed, 31 May 2023 21:04:53 +0100
Subject: [PATCH 08/14] Fix handling of newlines in filenames.

---
 Lib/pathlib.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index b7cb6385d370ce..eebf84960ebc27 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -977,18 +977,20 @@ def _scandir(self):
         return os.scandir(self)
 
     def _make_child_relpath(self, name):
-        path_str = str(self)
+        sep = self._flavour.sep
+        lines_name = name.replace('\n', sep)
         lines_str = self._lines
+        path_str = str(self)
         tail = self._tail
         if tail:
-            path_str = f'{path_str}{self._flavour.sep}{name}'
-            lines_str = f'{lines_str}\n{name}'
+            path_str = f'{path_str}{sep}{name}'
+            lines_str = f'{lines_str}\n{lines_name}'
         elif path_str != '.':
             path_str = f'{path_str}{name}'
-            lines_str = f'{lines_str}{name}'
+            lines_str = f'{lines_str}{lines_name}'
         else:
             path_str = name
-            lines_str = name
+            lines_str = lines_name
         path = self.with_segments(path_str)
         path._str = path_str
         path._drv = self.drive

From ad33eece664095b3b990687d29da101a04499bfe Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Thu, 1 Jun 2023 18:12:26 +0100
Subject: [PATCH 09/14] Speed up recursive selection

---
 Lib/pathlib.py | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index eebf84960ebc27..01d4e28b48efe1 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -133,15 +133,15 @@ def _compile_pattern_lines(pattern_lines, case_sensitive):
     return re.compile(''.join(parts), flags=flags)
 
 
-def _select_children(paths, dir_only, follow_symlinks, match):
+def _select_children(parent_paths, dir_only, follow_symlinks, match):
     """Yield direct children of given paths, filtering by name and type."""
     if follow_symlinks is None:
         follow_symlinks = True
-    for path in paths:
+    for parent_path in parent_paths:
         try:
             # We must close the scandir() object before proceeding to
             # avoid exhausting file descriptors when globbing deep trees.
-            with path._scandir() as scandir_it:
+            with parent_path._scandir() as scandir_it:
                 entries = list(scandir_it)
         except OSError:
             pass
@@ -155,21 +155,35 @@ def _select_children(paths, dir_only, follow_symlinks, match):
                         continue
                 name = entry.name
                 if match is None or match(name):
-                    yield path._make_child_relpath(name)
+                    yield parent_path._make_child_relpath(name)
 
 
-def _select_recursive(paths, dir_only, follow_symlinks):
+def _select_recursive(parent_paths, dir_only, follow_symlinks):
     """Yield given paths and all their subdirectories, recursively."""
     if follow_symlinks is None:
         follow_symlinks = False
-    for path in paths:
-        yield path
-        for dirpath, dirnames, filenames in path.walk(follow_symlinks=follow_symlinks):
-            for dirname in dirnames:
-                yield dirpath._make_child_relpath(dirname)
-            if not dir_only:
-                for filename in filenames:
-                    yield dirpath._make_child_relpath(filename)
+    for parent_path in parent_paths:
+        paths = [parent_path]
+        while paths:
+            path = paths.pop()
+            yield path
+            try:
+                # We must close the scandir() object before proceeding to
+                # avoid exhausting file descriptors when globbing deep trees.
+                with path._scandir() as scandir_it:
+                    entries = list(scandir_it)
+            except OSError:
+                pass
+            else:
+                for entry in entries:
+                    try:
+                        if entry.is_dir(follow_symlinks=follow_symlinks):
+                            paths.append(path._make_child_relpath(entry.name))
+                            continue
+                    except OSError:
+                        pass
+                    if not dir_only:
+                        yield path._make_child_relpath(entry.name)
 
 
 def _select_unique(paths):

From 064efdb452f1e0db040d1bda2b2faa9ef57df8c2 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Thu, 1 Jun 2023 18:14:21 +0100
Subject: [PATCH 10/14] Exclude `self` from walk-and-match matching.

---
 Lib/pathlib.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 01d4e28b48efe1..300c836ae2b0c1 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -106,7 +106,11 @@ def _compile_pattern_lines(pattern_lines, case_sensitive):
     # Match the start of the path, or just after a path separator
     parts = ['^']
     for part in pattern_lines.splitlines(keepends=True):
-        if part == '**\n':
+        if part == '*\n':
+            part = r'.+\n'
+        elif part == '*':
+            part = r'.+'
+        elif part == '**\n':
             # '**/' component: we use '[\s\S]' rather than '.' so that path
             # separators (i.e. newlines) are matched. The trailing '^' ensures
             # we terminate after a path separator (i.e. on a new line).
@@ -1083,9 +1087,9 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
 
                 if filter_paths:
                     # Filter out paths that don't match pattern.
-                    pattern_lines = self.joinpath(path_pattern)._lines
-                    match = _compile_pattern_lines(pattern_lines, case_sensitive).match
-                    paths = (path for path in paths if match(path._lines))
+                    prefix_len = len(self._make_child_relpath('_')._lines) - 1
+                    match = _compile_pattern_lines(path_pattern._lines, case_sensitive).match
+                    paths = (path for path in paths if match(path._lines[prefix_len:]))
                 elif deduplicate_paths:
                     # De-duplicate if we've already seen a '**' component.
                     paths = _select_unique(paths)

From 14c6a587a22b2492438d1b5dbe90189bd8ee0c24 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Thu, 1 Jun 2023 18:46:56 +0100
Subject: [PATCH 11/14] Optimize walk-and-match logic.

---
 Lib/pathlib.py | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 300c836ae2b0c1..dc23b7a65f89e6 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -1058,7 +1058,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
         # build a `re.Pattern` object. This pattern is used to filter the
         # recursive walk. As a result, pattern parts following a '**' wildcard
         # do not perform any filesystem access, which can be much faster!
-        filter_paths_supported = follow_symlinks is not None and '..' not in pattern_parts
+        filter_paths = follow_symlinks is not None and '..' not in pattern_parts
         deduplicate_paths = False
         paths = iter([self] if self.is_dir() else [])
         part_idx = 0
@@ -1071,26 +1071,22 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
             elif part == '..':
                 paths = (path._make_child_relpath('..') for path in paths)
             elif part == '**':
-                filter_paths = False
-                if filter_paths_supported:
-                    # Consume remaining path components, except trailing slash.
-                    while part_idx < len(pattern_parts) and pattern_parts[part_idx] != '':
-                        filter_paths = True
-                        part_idx += 1
-                else:
-                    # Consume adjacent '**' components.
-                    while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
-                        part_idx += 1
+                if filter_paths and part_idx < len(pattern_parts) and pattern_parts[part_idx] != '':
+                    dir_only = pattern_parts[-1] == ''
+                    paths = _select_recursive(paths, dir_only, follow_symlinks)
 
-                dir_only = part_idx < len(pattern_parts)
-                paths = _select_recursive(paths, dir_only, follow_symlinks)
-
-                if filter_paths:
                     # Filter out paths that don't match pattern.
                     prefix_len = len(self._make_child_relpath('_')._lines) - 1
                     match = _compile_pattern_lines(path_pattern._lines, case_sensitive).match
                     paths = (path for path in paths if match(path._lines[prefix_len:]))
-                elif deduplicate_paths:
+                    return paths
+
+                # Consume adjacent '**' components.
+                while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
+                    part_idx += 1
+                dir_only = part_idx < len(pattern_parts)
+                paths = _select_recursive(paths, dir_only, follow_symlinks)
+                if deduplicate_paths:
                     # De-duplicate if we've already seen a '**' component.
                     paths = _select_unique(paths)
                 deduplicate_paths = True

From 04720bdfb8e0df75991e0bfeb8da19bb8840eb62 Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Thu, 1 Jun 2023 19:23:21 +0100
Subject: [PATCH 12/14] Consume adjacent '**' segments before considering use
 of matching.

---
 Lib/pathlib.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index dc23b7a65f89e6..89c7b1ebdf727f 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -1071,6 +1071,10 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
             elif part == '..':
                 paths = (path._make_child_relpath('..') for path in paths)
             elif part == '**':
+                # Consume adjacent '**' components.
+                while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
+                    part_idx += 1
+
                 if filter_paths and part_idx < len(pattern_parts) and pattern_parts[part_idx] != '':
                     dir_only = pattern_parts[-1] == ''
                     paths = _select_recursive(paths, dir_only, follow_symlinks)
@@ -1081,9 +1085,6 @@ def _glob(self, pattern, case_sensitive, follow_symlinks):
                     paths = (path for path in paths if match(path._lines[prefix_len:]))
                     return paths
 
-                # Consume adjacent '**' components.
-                while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
-                    part_idx += 1
                 dir_only = part_idx < len(pattern_parts)
                 paths = _select_recursive(paths, dir_only, follow_symlinks)
                 if deduplicate_paths:

From 9c6b44f54e56aa07edd1b03c29cc5c4e1de0681a Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Thu, 1 Jun 2023 19:57:51 +0100
Subject: [PATCH 13/14] Add some more tests for complex patterns.

---
 Lib/test/test_pathlib.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 076ace3d930857..75d104b7b79020 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1899,6 +1899,17 @@ def _check(path, glob, expected):
         _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"])
         _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"])
         _check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])
+        _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/.."])
+        _check(p, "dir*/**/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
+                                 "dirC", "dirC/dirD", "dirE"])
+        _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..",
+                                 "dirC/..", "dirC/dirD/..", "dirE/.."])
+        _check(p, "dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"])
+        _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."])
+        _check(p, "dir*/**/fileC", ["dirC/fileC"])
+        _check(p, "dir*/*/../**/fileC", ["dirA/linkC/../dirC/fileC", "dirC/dirD/../fileC"])
+        _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"])
+        _check(p, "*/dirD/**/", ["dirC/dirD"])
 
     @os_helper.skip_unless_symlink
     def test_glob_no_follow_symlinks_common(self):
@@ -1913,6 +1924,15 @@ def _check(path, glob, expected):
         _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
         _check(p, "*/fileB", ["dirB/fileB"])
         _check(p, "*/", ["dirA", "dirB", "dirC", "dirE"])
+        _check(p, "dir*/*/..", ["dirC/dirD/.."])
+        _check(p, "dir*/**/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
+        _check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."])
+        _check(p, "dir*/*/**/", ["dirC/dirD"])
+        _check(p, "dir*/*/**/..", ["dirC/dirD/.."])
+        _check(p, "dir*/**/fileC", ["dirC/fileC"])
+        _check(p, "dir*/*/../**/fileC", ["dirC/dirD/../fileC"])
+        _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"])
+        _check(p, "*/dirD/**/", ["dirC/dirD"])
 
     def test_rglob_common(self):
         def _check(glob, expected):

From 4cfb83670d43ed62956c0c3b6c0473803a25993d Mon Sep 17 00:00:00 2001
From: barneygale <barney.gale@gmail.com>
Date: Thu, 1 Jun 2023 20:35:45 +0100
Subject: [PATCH 14/14] Drop test case that doesn't work on Windows.

`..` components are resolved lexically, rather than after symlinks.
---
 Lib/test/test_pathlib.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
index 75d104b7b79020..dc94e337ee65e6 100644
--- a/Lib/test/test_pathlib.py
+++ b/Lib/test/test_pathlib.py
@@ -1907,7 +1907,6 @@ def _check(path, glob, expected):
         _check(p, "dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"])
         _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."])
         _check(p, "dir*/**/fileC", ["dirC/fileC"])
-        _check(p, "dir*/*/../**/fileC", ["dirA/linkC/../dirC/fileC", "dirC/dirD/../fileC"])
         _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"])
         _check(p, "*/dirD/**/", ["dirC/dirD"])
 
@@ -1930,7 +1929,6 @@ def _check(path, glob, expected):
         _check(p, "dir*/*/**/", ["dirC/dirD"])
         _check(p, "dir*/*/**/..", ["dirC/dirD/.."])
         _check(p, "dir*/**/fileC", ["dirC/fileC"])
-        _check(p, "dir*/*/../**/fileC", ["dirC/dirD/../fileC"])
         _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"])
         _check(p, "*/dirD/**/", ["dirC/dirD"])