Skip to content

gh-50333: Deprecate support of non-tuple sequences in PyArg_ParseTuple() #128374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
19 changes: 17 additions & 2 deletions Doc/c-api/arg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -357,11 +357,26 @@ Other objects

.. versionadded:: 3.3

``(items)`` (:class:`tuple`) [*matching-items*]
The object must be a Python sequence whose length is the number of format units
``(items)`` (sequence) [*matching-items*]
The object must be a Python sequence (except :class:`str`, :class:`bytes`
or :class:`bytearray`) whose length is the number of format units
in *items*. The C arguments must correspond to the individual format units in
*items*. Format units for sequences may be nested.

If *items* contains format units which store a :ref:`borrowed buffer
<c-arg-borrowed-buffer>` (``s``, ``s#``, ``z``, ``z#``, ``y``, or ``y#``)
or a :term:`borrowed reference` (``S``, ``Y``, ``U``, ``O``, or ``O!``),
the object must be a Python tuple.
The *converter* for the ``O&`` format unit in *items* must not store
a borrowed buffer or a borrowed reference.

.. versionchanged:: next
:class:`str` and :class:`bytearray` objects no longer accepted as a sequence.

.. deprecated:: next
Non-tuple sequences are deprecated if *items* contains format units
which store a borrowed buffer or a borrowed reference.

A few other characters have a meaning in a format string. These may not occur
inside nested parentheses. They are:

Expand Down
7 changes: 7 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1911,6 +1911,13 @@ Deprecated
:c:macro:`!isfinite` available from :file:`math.h`
since C99. (Contributed by Sergey B Kirpichev in :gh:`119613`.)

* Non-tuple sequences are deprecated as argument for the ``(items)``
format unit in :c:func:`PyArg_ParseTuple` and other
:ref:`argument parsing <arg-parsing>` functions if *items* contains
format units which store a :ref:`borrowed buffer <c-arg-borrowed-buffer>`
or a :term:`borrowed reference`.
(Contributed by Serhiy Storchaka in :gh:`50333`.)

* The previously undocumented function :c:func:`PySequence_In` is :term:`soft deprecated`.
Use :c:func:`PySequence_Contains` instead.
(Contributed by Yuki Kobayashi in :gh:`127896`.)
Expand Down
59 changes: 49 additions & 10 deletions Lib/test/test_capi/test_getargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@

NULL = None

class CustomError(Exception):
pass

class Index:
def __index__(self):
return 99
Expand Down Expand Up @@ -586,13 +589,13 @@ def test_tuple(self):
ret = getargs_tuple(1, (2, 3))
self.assertEqual(ret, (1,2,3))

# make sure invalid tuple arguments are handled correctly
class seq:
# make sure invalid sequence arguments are handled correctly
class TestSeq:
def __len__(self):
return 2
def __getitem__(self, n):
raise ValueError
self.assertRaises(TypeError, getargs_tuple, 1, seq())
raise CustomError
self.assertRaises(CustomError, getargs_tuple, 1, TestSeq())

class Keywords_TestCase(unittest.TestCase):
def test_kwargs(self):
Expand Down Expand Up @@ -1320,33 +1323,69 @@ def test_nonascii_keywords(self):
f"this function got an unexpected keyword argument '{name2}'"):
parse((), {name2: 1, name3: 2}, '|OO', [name, name3])

def test_nested_tuple(self):
def test_nested_sequence(self):
parse = _testcapi.parse_tuple_and_keywords

self.assertEqual(parse(((1, 2, 3),), {}, '(OOO)', ['a']), (1, 2, 3))
self.assertEqual(parse((1, (2, 3), 4), {}, 'O(OO)O', ['a', 'b', 'c']),
(1, 2, 3, 4))
parse(((1, 2, 3),), {}, '(iii)', ['a'])
parse(([1, 2, 3],), {}, '(iii)', ['a'])

with self.assertRaisesRegex(TypeError,
"argument 1 must be sequence of length 2, not 3"):
"argument 1 must be tuple of length 2, not 3"):
parse(((1, 2, 3),), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be sequence of length 2, not 1"):
"argument 1 must be tuple of length 2, not 1"):
parse(((1,),), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be 2-item sequence, not int"):
"argument 1 must be sequence of length 2, not 3"):
parse(([1, 2, 3],), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be sequence of length 2, not 1"):
parse(([1,],), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be 2-item tuple, not int"):
parse((1,), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be 2-item sequence, not bytes"):
"argument 1 must be 2-item tuple, not None$"):
parse((None,), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be 2-item tuple, not str"):
parse(('ab',), {}, '(CC)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be 2-item tuple, not bytes"):
parse((b'ab',), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be 2-item tuple, not bytearray"):
parse((bytearray(b'ab'),), {}, '(ii)', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be 2-item tuple, not dict"):
parse(({},), {}, '(ii)', ['a'])

with self.assertWarnsRegex(DeprecationWarning,
"argument must be 3-item tuple, not list"):
self.assertEqual(parse(([1, 2, 3],), {}, '(OOO)', ['a']), (1, 2, 3))
with self.assertWarnsRegex(DeprecationWarning,
"argument must be 2-item tuple, not list"):
with self.assertRaisesRegex(TypeError,
"argument 1 must be tuple of length 2, not 3"):
parse(([1, 2, 3],), {}, '(OO)', ['a'])
with self.assertWarnsRegex(DeprecationWarning,
"argument must be 2-item tuple, not list"):
with self.assertRaisesRegex(TypeError,
"argument 1 must be tuple of length 2, not 1"):
parse(([1,],), {}, '(OO)', ['a'])

for f in 'es', 'et', 'es#', 'et#':
with self.assertRaises(LookupError): # empty encoding ""
parse((('a',),), {}, '(' + f + ')', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be sequence of length 1, not 0"):
"argument 1 must be tuple of length 1, not 0"):
parse(((),), {}, '(' + f + ')', ['a'])
with self.assertRaisesRegex(TypeError,
"argument 1 must be sequence of length 1, not 0"):
parse(([],), {}, '(' + f + ')', ['a'])

@unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi')
def test_gh_119213(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Non-tuple sequences are deprecated as argument for the ``(items)`` format
unit in :c:func:`PyArg_ParseTuple` and other :ref:`argument parsing
<arg-parsing>` functions if *items* contains format units which store
a :ref:`borrowed buffer <c-arg-borrowed-buffer>` or
a :term:`borrowed reference`.
89 changes: 72 additions & 17 deletions Python/getargs.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
const char *format = *p_format;
int i;
Py_ssize_t len;
int istuple = PyTuple_Check(arg);
int mustbetuple = istuple;

for (;;) {
int c = *format++;
Expand All @@ -481,51 +483,104 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
}
else if (c == ':' || c == ';' || c == '\0')
break;
else if (level == 0 && Py_ISALPHA(c) && c != 'e')
n++;
else {
if (level == 0 && Py_ISALPHA(c)) {
n++;
}
if (c == 'e' && (*format == 's' || *format == 't')) {
format++;
continue;
}
if (!mustbetuple) {
switch (c) {
case 'y':
case 's':
case 'z':
if (*format != '*') {
mustbetuple = 1;
}
break;
case 'S':
case 'Y':
case 'U':
mustbetuple = 1;
break;
case 'O':
if (*format != '&') {
mustbetuple = 1;
}
break;
}
}
}
}

if (!PySequence_Check(arg) || PyBytes_Check(arg)) {
if (istuple) {
/* fallthrough */
}
else if (!PySequence_Check(arg) ||
PyUnicode_Check(arg) || PyBytes_Check(arg) || PyByteArray_Check(arg))
{
levels[0] = 0;
PyOS_snprintf(msgbuf, bufsize,
"must be %d-item sequence, not %.50s",
"must be %d-item tuple, not %.50s",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

snprintf will keep the string within bufsize; we should not need to use the sized specifier.

Suggested change
"must be %d-item tuple, not %.50s",
"must be %d-item tuple, not %s",

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for consistency with other formats. Also, if we will add more text after the type name, it is easier to not forget to truncate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worry; snprintf will truncate.

n,
arg == Py_None ? "None" : Py_TYPE(arg)->tp_name);
return msgbuf;
}
else {
if (mustbetuple) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 0,
"argument must be %d-item tuple, not %T", n, arg))
{
return msgbuf;
}
}
len = PySequence_Size(arg);
if (len != n) {
levels[0] = 0;
PyOS_snprintf(msgbuf, bufsize,
"must be %s of length %d, not %zd",
mustbetuple ? "tuple" : "sequence", n, len);
return msgbuf;
}
arg = PySequence_Tuple(arg);
if (arg == NULL) {
return msgbuf;
}
}

len = PySequence_Size(arg);
len = PyTuple_GET_SIZE(arg);
if (len != n) {
levels[0] = 0;
PyOS_snprintf(msgbuf, bufsize,
"must be sequence of length %d, not %zd",
"must be tuple of length %d, not %zd",
n, len);
if (!istuple) {
Py_DECREF(arg);
}
return msgbuf;
}

format = *p_format;
for (i = 0; i < n; i++) {
const char *msg;
PyObject *item;
item = PySequence_GetItem(arg, i);
if (item == NULL) {
PyErr_Clear();
levels[0] = i+1;
levels[1] = 0;
strncpy(msgbuf, "is not retrievable", bufsize);
return msgbuf;
}
PyObject *item = PyTuple_GET_ITEM(arg, i);
msg = convertitem(item, &format, p_va, flags, levels+1,
msgbuf, bufsize, freelist);
/* PySequence_GetItem calls tp->sq_item, which INCREFs */
Py_XDECREF(item);
if (msg != NULL) {
levels[0] = i+1;
if (!istuple) {
Py_DECREF(arg);
}
return msg;
}
}

*p_format = format;
if (!istuple) {
Py_DECREF(arg);
}
return NULL;
}

Expand Down
Loading