Skip to content

Commit 882cb79

Browse files
gh-56166: Deprecate passing confusing positional arguments in re functions (#107778)
Deprecate passing optional arguments maxsplit, count and flags in module-level functions re.split(), re.sub() and re.subn() as positional. They should only be passed by keyword.
1 parent fb8fe37 commit 882cb79

File tree

5 files changed

+153
-19
lines changed

5 files changed

+153
-19
lines changed

Doc/library/re.rst

+13-11
Original file line numberDiff line numberDiff line change
@@ -898,7 +898,7 @@ Functions
898898
['Words', 'words', 'words', '']
899899
>>> re.split(r'(\W+)', 'Words, words, words.')
900900
['Words', ', ', 'words', ', ', 'words', '.', '']
901-
>>> re.split(r'\W+', 'Words, words, words.', 1)
901+
>>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
902902
['Words', 'words, words.']
903903
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
904904
['0', '3', '9']
@@ -929,6 +929,11 @@ Functions
929929
.. versionchanged:: 3.7
930930
Added support of splitting on a pattern that could match an empty string.
931931

932+
.. deprecated:: 3.13
933+
Passing *maxsplit* and *flags* as positional arguments is deprecated.
934+
In future Python versions they will be
935+
:ref:`keyword-only parameters <keyword-only_parameter>`.
936+
932937

933938
.. function:: findall(pattern, string, flags=0)
934939

@@ -1027,8 +1032,6 @@ Functions
10271032
.. versionchanged:: 3.7
10281033
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
10291034
now are errors.
1030-
1031-
.. versionchanged:: 3.7
10321035
Empty matches for the pattern are replaced when adjacent to a previous
10331036
non-empty match.
10341037

@@ -1037,18 +1040,17 @@ Functions
10371040
In :class:`bytes` replacement strings, group *name* can only contain bytes
10381041
in the ASCII range (``b'\x00'``-``b'\x7f'``).
10391042

1043+
.. deprecated:: 3.13
1044+
Passing *count* and *flags* as positional arguments is deprecated.
1045+
In future Python versions they will be
1046+
:ref:`keyword-only parameters <keyword-only_parameter>`.
1047+
10401048

10411049
.. function:: subn(pattern, repl, string, count=0, flags=0)
10421050

10431051
Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
10441052
number_of_subs_made)``.
10451053

1046-
.. versionchanged:: 3.1
1047-
Added the optional flags argument.
1048-
1049-
.. versionchanged:: 3.5
1050-
Unmatched groups are replaced with an empty string.
1051-
10521054

10531055
.. function:: escape(pattern)
10541056

@@ -1656,7 +1658,7 @@ because the address has spaces, our splitting pattern, in it:
16561658
.. doctest::
16571659
:options: +NORMALIZE_WHITESPACE
16581660

1659-
>>> [re.split(":? ", entry, 3) for entry in entries]
1661+
>>> [re.split(":? ", entry, maxsplit=3) for entry in entries]
16601662
[['Ross', 'McFluff', '834.345.1254', '155 Elm Street'],
16611663
['Ronald', 'Heathmore', '892.345.3428', '436 Finley Avenue'],
16621664
['Frank', 'Burger', '925.541.7625', '662 South Dogwood Way'],
@@ -1669,7 +1671,7 @@ house number from the street name:
16691671
.. doctest::
16701672
:options: +NORMALIZE_WHITESPACE
16711673

1672-
>>> [re.split(":? ", entry, 4) for entry in entries]
1674+
>>> [re.split(":? ", entry, maxsplit=4) for entry in entries]
16731675
[['Ross', 'McFluff', '834.345.1254', '155', 'Elm Street'],
16741676
['Ronald', 'Heathmore', '892.345.3428', '436', 'Finley Avenue'],
16751677
['Frank', 'Burger', '925.541.7625', '662', 'South Dogwood Way'],

Doc/whatsnew/3.13.rst

+7
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,13 @@ Porting to Python 3.13
832832
Deprecated
833833
----------
834834

835+
* Passing optional arguments *maxsplit*, *count* and *flags* in module-level
836+
functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional
837+
arguments is now deprecated.
838+
In future Python versions these parameters will be
839+
:ref:`keyword-only <keyword-only_parameter>`.
840+
(Contributed by Serhiy Storchaka in :gh:`56166`.)
841+
835842
* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
836843
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
837844
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.

Lib/re/__init__.py

+64-3
Original file line numberDiff line numberDiff line change
@@ -175,16 +175,39 @@ def search(pattern, string, flags=0):
175175
a Match object, or None if no match was found."""
176176
return _compile(pattern, flags).search(string)
177177

178-
def sub(pattern, repl, string, count=0, flags=0):
178+
class _ZeroSentinel(int):
179+
pass
180+
_zero_sentinel = _ZeroSentinel()
181+
182+
def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
179183
"""Return the string obtained by replacing the leftmost
180184
non-overlapping occurrences of the pattern in string by the
181185
replacement repl. repl can be either a string or a callable;
182186
if a string, backslash escapes in it are processed. If it is
183187
a callable, it's passed the Match object and must return
184188
a replacement string to be used."""
189+
if args:
190+
if count is not _zero_sentinel:
191+
raise TypeError("sub() got multiple values for argument 'count'")
192+
count, *args = args
193+
if args:
194+
if flags is not _zero_sentinel:
195+
raise TypeError("sub() got multiple values for argument 'flags'")
196+
flags, *args = args
197+
if args:
198+
raise TypeError("sub() takes from 3 to 5 positional arguments "
199+
"but %d were given" % (5 + len(args)))
200+
201+
import warnings
202+
warnings.warn(
203+
"'count' is passed as positional argument",
204+
DeprecationWarning, stacklevel=2
205+
)
206+
185207
return _compile(pattern, flags).sub(repl, string, count)
208+
sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
186209

187-
def subn(pattern, repl, string, count=0, flags=0):
210+
def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
188211
"""Return a 2-tuple containing (new_string, number).
189212
new_string is the string obtained by replacing the leftmost
190213
non-overlapping occurrences of the pattern in the source
@@ -193,17 +216,55 @@ def subn(pattern, repl, string, count=0, flags=0):
193216
callable; if a string, backslash escapes in it are processed.
194217
If it is a callable, it's passed the Match object and must
195218
return a replacement string to be used."""
219+
if args:
220+
if count is not _zero_sentinel:
221+
raise TypeError("subn() got multiple values for argument 'count'")
222+
count, *args = args
223+
if args:
224+
if flags is not _zero_sentinel:
225+
raise TypeError("subn() got multiple values for argument 'flags'")
226+
flags, *args = args
227+
if args:
228+
raise TypeError("subn() takes from 3 to 5 positional arguments "
229+
"but %d were given" % (5 + len(args)))
230+
231+
import warnings
232+
warnings.warn(
233+
"'count' is passed as positional argument",
234+
DeprecationWarning, stacklevel=2
235+
)
236+
196237
return _compile(pattern, flags).subn(repl, string, count)
238+
subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'
197239

198-
def split(pattern, string, maxsplit=0, flags=0):
240+
def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
199241
"""Split the source string by the occurrences of the pattern,
200242
returning a list containing the resulting substrings. If
201243
capturing parentheses are used in pattern, then the text of all
202244
groups in the pattern are also returned as part of the resulting
203245
list. If maxsplit is nonzero, at most maxsplit splits occur,
204246
and the remainder of the string is returned as the final element
205247
of the list."""
248+
if args:
249+
if maxsplit is not _zero_sentinel:
250+
raise TypeError("split() got multiple values for argument 'maxsplit'")
251+
maxsplit, *args = args
252+
if args:
253+
if flags is not _zero_sentinel:
254+
raise TypeError("split() got multiple values for argument 'flags'")
255+
flags, *args = args
256+
if args:
257+
raise TypeError("split() takes from 2 to 4 positional arguments "
258+
"but %d were given" % (4 + len(args)))
259+
260+
import warnings
261+
warnings.warn(
262+
"'maxsplit' is passed as positional argument",
263+
DeprecationWarning, stacklevel=2
264+
)
265+
206266
return _compile(pattern, flags).split(string, maxsplit)
267+
split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'
207268

208269
def findall(pattern, string, flags=0):
209270
"""Return a list of all non-overlapping matches in the string.

Lib/test/test_re.py

+66-5
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,10 @@ def test_basic_re_sub(self):
127127
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
128128
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
129129
'9.3 -3 24x100y')
130-
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
131-
'9.3 -3 23x99y')
130+
with self.assertWarns(DeprecationWarning) as w:
131+
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
132+
'9.3 -3 23x99y')
133+
self.assertEqual(w.filename, __file__)
132134
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
133135
'9.3 -3 23x99y')
134136

@@ -235,9 +237,42 @@ def test_sub_template_numeric_escape(self):
235237

236238
def test_qualified_re_sub(self):
237239
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
238-
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
240+
with self.assertWarns(DeprecationWarning) as w:
241+
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
242+
self.assertEqual(w.filename, __file__)
239243
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
240244

245+
with self.assertRaisesRegex(TypeError,
246+
r"sub\(\) got multiple values for argument 'count'"):
247+
re.sub('a', 'b', 'aaaaa', 1, count=1)
248+
with self.assertRaisesRegex(TypeError,
249+
r"sub\(\) got multiple values for argument 'flags'"):
250+
re.sub('a', 'b', 'aaaaa', 1, 0, flags=0)
251+
with self.assertRaisesRegex(TypeError,
252+
r"sub\(\) takes from 3 to 5 positional arguments but 6 "
253+
r"were given"):
254+
re.sub('a', 'b', 'aaaaa', 1, 0, 0)
255+
256+
def test_misuse_flags(self):
257+
with self.assertWarns(DeprecationWarning) as w:
258+
result = re.sub('a', 'b', 'aaaaa', re.I)
259+
self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
260+
self.assertEqual(str(w.warning),
261+
"'count' is passed as positional argument")
262+
self.assertEqual(w.filename, __file__)
263+
with self.assertWarns(DeprecationWarning) as w:
264+
result = re.subn("b*", "x", "xyz", re.I)
265+
self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
266+
self.assertEqual(str(w.warning),
267+
"'count' is passed as positional argument")
268+
self.assertEqual(w.filename, __file__)
269+
with self.assertWarns(DeprecationWarning) as w:
270+
result = re.split(":", ":a:b::c", re.I)
271+
self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
272+
self.assertEqual(str(w.warning),
273+
"'maxsplit' is passed as positional argument")
274+
self.assertEqual(w.filename, __file__)
275+
241276
def test_bug_114660(self):
242277
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
243278
'hello there')
@@ -344,9 +379,22 @@ def test_re_subn(self):
344379
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
345380
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
346381
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
347-
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
382+
with self.assertWarns(DeprecationWarning) as w:
383+
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
384+
self.assertEqual(w.filename, __file__)
348385
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))
349386

387+
with self.assertRaisesRegex(TypeError,
388+
r"subn\(\) got multiple values for argument 'count'"):
389+
re.subn('a', 'b', 'aaaaa', 1, count=1)
390+
with self.assertRaisesRegex(TypeError,
391+
r"subn\(\) got multiple values for argument 'flags'"):
392+
re.subn('a', 'b', 'aaaaa', 1, 0, flags=0)
393+
with self.assertRaisesRegex(TypeError,
394+
r"subn\(\) takes from 3 to 5 positional arguments but 6 "
395+
r"were given"):
396+
re.subn('a', 'b', 'aaaaa', 1, 0, 0)
397+
350398
def test_re_split(self):
351399
for string in ":a:b::c", S(":a:b::c"):
352400
self.assertTypedEqual(re.split(":", string),
@@ -401,7 +449,9 @@ def test_re_split(self):
401449
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
402450

403451
def test_qualified_re_split(self):
404-
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
452+
with self.assertWarns(DeprecationWarning) as w:
453+
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
454+
self.assertEqual(w.filename, __file__)
405455
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
406456
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
407457
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
@@ -411,6 +461,17 @@ def test_qualified_re_split(self):
411461
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
412462
['', ':', '', '', 'a:b::c'])
413463

464+
with self.assertRaisesRegex(TypeError,
465+
r"split\(\) got multiple values for argument 'maxsplit'"):
466+
re.split(":", ":a:b::c", 2, maxsplit=2)
467+
with self.assertRaisesRegex(TypeError,
468+
r"split\(\) got multiple values for argument 'flags'"):
469+
re.split(":", ":a:b::c", 2, 0, flags=0)
470+
with self.assertRaisesRegex(TypeError,
471+
r"split\(\) takes from 2 to 4 positional arguments but 5 "
472+
r"were given"):
473+
re.split(":", ":a:b::c", 2, 0, 0)
474+
414475
def test_re_findall(self):
415476
self.assertEqual(re.findall(":+", "abc"), [])
416477
for string in "a:b::c:::d", S("a:b::c:::d"):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
2+
module-level functions :func:`re.split`, :func:`re.sub` and :func:`re.subn` as positional.
3+
They should only be passed by keyword.

0 commit comments

Comments
 (0)