Skip to content

Commit 492d410

Browse files
bpo-42885: Optimize search for regular expressions starting with "\A" or "^" (GH-32021)
Affected functions are re.search(), re.split(), re.findall(), re.finditer() and re.sub().
1 parent 32e7715 commit 492d410

File tree

3 files changed

+25
-0
lines changed

3 files changed

+25
-0
lines changed

Lib/test/test_re.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66
import sre_compile
77
import string
8+
import time
89
import unittest
910
import warnings
1011
from re import Scanner
@@ -2038,6 +2039,20 @@ def test_bug_40736(self):
20382039
with self.assertRaisesRegex(TypeError, "got 'type'"):
20392040
re.search("x*", type)
20402041

2042+
def test_search_anchor_at_beginning(self):
2043+
s = 'x'*10**7
2044+
start = time.perf_counter()
2045+
for p in r'\Ay', r'^y':
2046+
self.assertIsNone(re.search(p, s))
2047+
self.assertEqual(re.split(p, s), [s])
2048+
self.assertEqual(re.findall(p, s), [])
2049+
self.assertEqual(list(re.finditer(p, s)), [])
2050+
self.assertEqual(re.sub(p, '', s), s)
2051+
t = time.perf_counter() - start
2052+
# Without optimization it takes 1 second on my computer.
2053+
# With optimization -- 0.0003 seconds.
2054+
self.assertLess(t, 0.1)
2055+
20412056
def test_possessive_quantifiers(self):
20422057
"""Test Possessive Quantifiers
20432058
Test quantifiers of the form @+ for some repetition operator @,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Optimize :func:`re.search`, :func:`re.split`, :func:`re.findall`,
2+
:func:`re.finditer` and :func:`re.sub` for regular expressions starting with
3+
``\A`` or ``^``.

Modules/sre_lib.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,13 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
16931693
state->start = state->ptr = ptr;
16941694
status = SRE(match)(state, pattern, 1);
16951695
state->must_advance = 0;
1696+
if (status == 0 && pattern[0] == SRE_OP_AT &&
1697+
(pattern[1] == SRE_AT_BEGINNING ||
1698+
pattern[1] == SRE_AT_BEGINNING_STRING))
1699+
{
1700+
state->start = state->ptr = ptr = end;
1701+
return 0;
1702+
}
16961703
while (status == 0 && ptr < end) {
16971704
ptr++;
16981705
RESET_CAPTURE_GROUP();

0 commit comments

Comments
 (0)