diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index fee59bf73f..73acb1fe8d 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -16,12 +16,6 @@ __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] -# Build a thread-safe incrementing counter to help create unique regexp group -# names across calls. -from itertools import count -_nextgroupnum = count().__next__ -del count - def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -41,7 +35,7 @@ def fnmatch(name, pat): pat = os.path.normcase(pat) return fnmatchcase(name, pat) -@functools.lru_cache(maxsize=256, typed=True) +@functools.lru_cache(maxsize=32768, typed=True) def _compile_pattern(pat): if isinstance(pat, bytes): pat_str = str(pat, 'ISO-8859-1') @@ -84,6 +78,11 @@ def translate(pat): """ STAR = object() + parts = _translate(pat, STAR, '.') + return _join_translated_parts(parts, STAR) + + +def _translate(pat, STAR, QUESTION_MARK): res = [] add = res.append i, n = 0, len(pat) @@ -95,7 +94,7 @@ def translate(pat): if (not res) or res[-1] is not STAR: add(STAR) elif c == '?': - add('.') + add(QUESTION_MARK) elif c == '[': j = i if j < n and pat[j] == '!': @@ -152,9 +151,11 @@ def translate(pat): else: add(re.escape(c)) assert i == n + return res + +def _join_translated_parts(inp, STAR): # Deal with STARs. - inp = res res = [] add = res.append i, n = 0, len(inp) @@ -165,17 +166,10 @@ def translate(pat): # Now deal with STAR fixed STAR fixed ... # For an interior `STAR fixed` pairing, we want to do a minimal # .*? match followed by `fixed`, with no possibility of backtracking. - # We can't spell that directly, but can trick it into working by matching - # .*?fixed - # in a lookahead assertion, save the matched part in a group, then - # consume that group via a backreference. If the overall match fails, - # the lookahead assertion won't try alternatives. So the translation is: - # (?=(?P.*?fixed))(?P=name) - # Group names are created as needed: g0, g1, g2, ... - # The numbers are obtained from _nextgroupnum() to ensure they're unique - # across calls and across threads. This is because people rely on the - # undocumented ability to join multiple translate() results together via - # "|" to build large regexps matching "one of many" shell patterns. + # Atomic groups ("(?>...)") allow us to spell that directly. + # Note: people rely on the undocumented ability to join multiple + # translate() results together via "|" to build large regexps matching + # "one of many" shell patterns. while i < n: assert inp[i] is STAR i += 1 @@ -192,8 +186,7 @@ def translate(pat): add(".*") add(fixed) else: - groupnum = _nextgroupnum() - add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") + add(f"(?>.*?{fixed})") assert i == n res = "".join(res) return fr'(?s:{res})\Z' diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 092cd56285..b977b8f8eb 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -71,7 +71,7 @@ def test_fnmatchcase(self): check('usr/bin', 'usr\\bin', False, fnmatchcase) check('usr\\bin', 'usr\\bin', True, fnmatchcase) - @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON") + @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON') def test_bytes(self): self.check_match(b'test', b'te*') self.check_match(b'test\xff', b'te*\xff') @@ -239,17 +239,9 @@ def test_translate(self): self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') # fancy translation to prevent exponential-time match failure t = translate('**a*a****a') - digits = re.findall(r'\d+', t) - self.assertEqual(len(digits), 4) - self.assertEqual(digits[0], digits[1]) - self.assertEqual(digits[2], digits[3]) - g1 = f"g{digits[0]}" # e.g., group name "g4" - g2 = f"g{digits[2]}" # e.g., group name "g5" - self.assertEqual(t, - fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z') + self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z') # and try pasting multiple translate results - it's an undocumented - # feature that this works; all the pain of generating unique group - # names across calls exists to support this + # feature that this works r1 = translate('**a**a**a*') r2 = translate('**b**b**b*') r3 = translate('*c*c*c*') diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 8a426e338a..1fe2aef39d 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -196,6 +196,7 @@ def test_rmtree_works_on_bytes(self): self.assertIsInstance(victim, bytes) shutil.rmtree(victim) + @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON; flaky') @os_helper.skip_unless_symlink def test_rmtree_fails_on_symlink_onerror(self): tmp = self.mkdtemp() @@ -1477,7 +1478,7 @@ def test_dont_copy_file_onto_link_to_itself(self): finally: shutil.rmtree(TESTFN, ignore_errors=True) - @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON') + @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON; AssertionError: SameFileError not raised for copyfile') @os_helper.skip_unless_symlink def test_dont_copy_file_onto_symlink_to_itself(self): # bug 851123.