Skip to content

Commit e75aebb

Browse files
authored
Update str related tests from 3.13.5 (#5953)
* Update str related tests from 3.13.5 * Apply RustPython patches * Mark new failing tests
1 parent fef660e commit e75aebb

8 files changed

+464
-163
lines changed

Lib/test/string_tests.py

Lines changed: 83 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,20 @@
88
from collections import UserList
99
import random
1010

11+
1112
class Sequence:
1213
def __init__(self, seq='wxyz'): self.seq = seq
1314
def __len__(self): return len(self.seq)
1415
def __getitem__(self, i): return self.seq[i]
1516

16-
class BadSeq1(Sequence):
17-
def __init__(self): self.seq = [7, 'hello', 123]
18-
def __str__(self): return '{0} {1} {2}'.format(*self.seq)
19-
20-
class BadSeq2(Sequence):
21-
def __init__(self): self.seq = ['a', 'b', 'c']
22-
def __len__(self): return 8
2317

2418
class BaseTest:
2519
# These tests are for buffers of values (bytes) and not
2620
# specific to character interpretation, used for bytes objects
2721
# and various string implementations
2822

2923
# The type to be tested
30-
# Change in subclasses to change the behaviour of fixtesttype()
24+
# Change in subclasses to change the behaviour of fixtype()
3125
type2test = None
3226

3327
# Whether the "contained items" of the container are integers in
@@ -36,7 +30,7 @@ class BaseTest:
3630
contains_bytes = False
3731

3832
# All tests pass their arguments to the testing methods
39-
# as str objects. fixtesttype() can be used to propagate
33+
# as str objects. fixtype() can be used to propagate
4034
# these arguments to the appropriate type
4135
def fixtype(self, obj):
4236
if isinstance(obj, str):
@@ -160,6 +154,14 @@ def test_count(self):
160154
self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
161155
self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))
162156

157+
# TODO: RUSTPYTHON; TypeError: Unexpected keyword argument count
158+
@unittest.expectedFailure
159+
def test_count_keyword(self):
160+
self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0))
161+
self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1))
162+
self.assertEqual('aa'.replace('a', 'b', 2), 'aa'.replace('a', 'b', count=2))
163+
self.assertEqual('aa'.replace('a', 'b', 3), 'aa'.replace('a', 'b', count=3))
164+
163165
def test_find(self):
164166
self.checkequal(0, 'abcdefghiabc', 'find', 'abc')
165167
self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1)
@@ -327,11 +329,12 @@ def reference_find(p, s):
327329
for i in range(len(s)):
328330
if s.startswith(p, i):
329331
return i
332+
if p == '' and s == '':
333+
return 0
330334
return -1
331335

332-
rr = random.randrange
333-
choices = random.choices
334-
for _ in range(1000):
336+
def check_pattern(rr):
337+
choices = random.choices
335338
p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20)
336339
p = p0[:len(p0) - rr(10)] # pop off some characters
337340
left = ''.join(choices('abcdef', k=rr(2000)))
@@ -341,6 +344,49 @@ def reference_find(p, s):
341344
self.checkequal(reference_find(p, text),
342345
text, 'find', p)
343346

347+
rr = random.randrange
348+
for _ in range(1000):
349+
check_pattern(rr)
350+
351+
# Test that empty string always work:
352+
check_pattern(lambda *args: 0)
353+
354+
def test_find_many_lengths(self):
355+
haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)]
356+
haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats]
357+
358+
needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)]
359+
needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats]
360+
361+
for n, haystack1 in haystacks:
362+
haystack2 = haystack1[:-1]
363+
for m, needle in needles:
364+
answer1 = 5 * (n - m) if m <= n else -1
365+
self.assertEqual(haystack1.find(needle), answer1, msg=(n,m))
366+
self.assertEqual(haystack2.find(needle), -1, msg=(n,m))
367+
368+
def test_adaptive_find(self):
369+
# This would be very slow for the naive algorithm,
370+
# but str.find() should be O(n + m).
371+
for N in 1000, 10_000, 100_000, 1_000_000:
372+
A, B = 'a' * N, 'b' * N
373+
haystack = A + A + B + A + A
374+
needle = A + B + B + A
375+
self.checkequal(-1, haystack, 'find', needle)
376+
self.checkequal(0, haystack, 'count', needle)
377+
self.checkequal(len(haystack), haystack + needle, 'find', needle)
378+
self.checkequal(1, haystack + needle, 'count', needle)
379+
380+
def test_find_with_memory(self):
381+
# Test the "Skip with memory" path in the two-way algorithm.
382+
for N in 1000, 3000, 10_000, 30_000:
383+
needle = 'ab' * N
384+
haystack = ('ab'*(N-1) + 'b') * 2
385+
self.checkequal(-1, haystack, 'find', needle)
386+
self.checkequal(0, haystack, 'count', needle)
387+
self.checkequal(len(haystack), haystack + needle, 'find', needle)
388+
self.checkequal(1, haystack + needle, 'count', needle)
389+
344390
def test_find_shift_table_overflow(self):
345391
"""When the table of 8-bit shifts overflows."""
346392
N = 2**8 + 100
@@ -724,6 +770,18 @@ def test_replace(self):
724770
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
725771
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
726772

773+
def test_replace_uses_two_way_maxcount(self):
774+
# Test that maxcount works in _two_way_count in fastsearch.h
775+
A, B = "A"*1000, "B"*1000
776+
AABAA = A + A + B + A + A
777+
ABBA = A + B + B + A
778+
self.checkequal(AABAA + ABBA,
779+
AABAA + ABBA, 'replace', ABBA, "ccc", 0)
780+
self.checkequal(AABAA + "ccc",
781+
AABAA + ABBA, 'replace', ABBA, "ccc", 1)
782+
self.checkequal(AABAA + "ccc",
783+
AABAA + ABBA, 'replace', ABBA, "ccc", 2)
784+
727785
@unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms")
728786
@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
729787
'only applies to 32-bit platforms')
@@ -734,8 +792,6 @@ def test_replace_overflow(self):
734792
self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)
735793
self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16)
736794

737-
738-
# Python 3.9
739795
def test_removeprefix(self):
740796
self.checkequal('am', 'spam', 'removeprefix', 'sp')
741797
self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam')
@@ -754,7 +810,6 @@ def test_removeprefix(self):
754810
self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42)
755811
self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l"))
756812

757-
# Python 3.9
758813
def test_removesuffix(self):
759814
self.checkequal('sp', 'spam', 'removesuffix', 'am')
760815
self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam')
@@ -1053,7 +1108,7 @@ def test_splitlines(self):
10531108
self.checkraises(TypeError, 'abc', 'splitlines', 42, 42)
10541109

10551110

1056-
class CommonTest(BaseTest):
1111+
class StringLikeTest(BaseTest):
10571112
# This testcase contains tests that can be used in all
10581113
# stringlike classes. Currently this is str and UserString.
10591114

@@ -1084,11 +1139,6 @@ def test_capitalize_nonascii(self):
10841139
self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7',
10851140
'\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize')
10861141

1087-
1088-
class MixinStrUnicodeUserStringTest:
1089-
# additional tests that only work for
1090-
# stringlike objects, i.e. str, UserString
1091-
10921142
def test_startswith(self):
10931143
self.checkequal(True, 'hello', 'startswith', 'he')
10941144
self.checkequal(True, 'hello', 'startswith', 'hello')
@@ -1273,8 +1323,11 @@ def test_join(self):
12731323
self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
12741324
('a' * i,) * i)
12751325

1276-
#self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1())
1277-
self.checkequal('a b c', ' ', 'join', BadSeq2())
1326+
class LiesAboutLengthSeq(Sequence):
1327+
def __init__(self): self.seq = ['a', 'b', 'c']
1328+
def __len__(self): return 8
1329+
1330+
self.checkequal('a b c', ' ', 'join', LiesAboutLengthSeq())
12781331

12791332
self.checkraises(TypeError, ' ', 'join')
12801333
self.checkraises(TypeError, ' ', 'join', None)
@@ -1459,19 +1512,19 @@ def test_find_etc_raise_correct_error_messages(self):
14591512
# issue 11828
14601513
s = 'hello'
14611514
x = 'x'
1462-
self.assertRaisesRegex(TypeError, r'^find\(', s.find,
1515+
self.assertRaisesRegex(TypeError, r'^find\b', s.find,
14631516
x, None, None, None)
1464-
self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind,
1517+
self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind,
14651518
x, None, None, None)
1466-
self.assertRaisesRegex(TypeError, r'^index\(', s.index,
1519+
self.assertRaisesRegex(TypeError, r'^index\b', s.index,
14671520
x, None, None, None)
1468-
self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex,
1521+
self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex,
14691522
x, None, None, None)
1470-
self.assertRaisesRegex(TypeError, r'^count\(', s.count,
1523+
self.assertRaisesRegex(TypeError, r'^count\b', s.count,
14711524
x, None, None, None)
1472-
self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith,
1525+
self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith,
14731526
x, None, None, None)
1474-
self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith,
1527+
self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith,
14751528
x, None, None, None)
14761529

14771530
# issue #15534

0 commit comments

Comments
 (0)