diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 66ccf3c96a65a4..d7b277e9eae03e 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -357,11 +357,26 @@ Other objects .. versionadded:: 3.3 -``(items)`` (:class:`tuple`) [*matching-items*] - The object must be a Python sequence whose length is the number of format units +``(items)`` (sequence) [*matching-items*] + The object must be a Python sequence (except :class:`str`, :class:`bytes` + or :class:`bytearray`) whose length is the number of format units in *items*. The C arguments must correspond to the individual format units in *items*. Format units for sequences may be nested. + If *items* contains format units which store a :ref:`borrowed buffer + ` (``s``, ``s#``, ``z``, ``z#``, ``y``, or ``y#``) + or a :term:`borrowed reference` (``S``, ``Y``, ``U``, ``O``, or ``O!``), + the object must be a Python tuple. + The *converter* for the ``O&`` format unit in *items* must not store + a borrowed buffer or a borrowed reference. + + .. versionchanged:: next + :class:`str` and :class:`bytearray` objects no longer accepted as a sequence. + + .. deprecated:: next + Non-tuple sequences are deprecated if *items* contains format units + which store a borrowed buffer or a borrowed reference. + A few other characters have a meaning in a format string. These may not occur inside nested parentheses. They are: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 13448d1fc07654..48a69b1d549463 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1911,6 +1911,13 @@ Deprecated :c:macro:`!isfinite` available from :file:`math.h` since C99. (Contributed by Sergey B Kirpichev in :gh:`119613`.) +* Non-tuple sequences are deprecated as argument for the ``(items)`` + format unit in :c:func:`PyArg_ParseTuple` and other + :ref:`argument parsing ` functions if *items* contains + format units which store a :ref:`borrowed buffer ` + or a :term:`borrowed reference`. + (Contributed by Serhiy Storchaka in :gh:`50333`.) + * The previously undocumented function :c:func:`PySequence_In` is :term:`soft deprecated`. Use :c:func:`PySequence_Contains` instead. (Contributed by Yuki Kobayashi in :gh:`127896`.) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 05d25a8d510bf7..60822d5d794a18 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -63,6 +63,9 @@ NULL = None +class CustomError(Exception): + pass + class Index: def __index__(self): return 99 @@ -586,13 +589,13 @@ def test_tuple(self): ret = getargs_tuple(1, (2, 3)) self.assertEqual(ret, (1,2,3)) - # make sure invalid tuple arguments are handled correctly - class seq: + # make sure invalid sequence arguments are handled correctly + class TestSeq: def __len__(self): return 2 def __getitem__(self, n): - raise ValueError - self.assertRaises(TypeError, getargs_tuple, 1, seq()) + raise CustomError + self.assertRaises(CustomError, getargs_tuple, 1, TestSeq()) class Keywords_TestCase(unittest.TestCase): def test_kwargs(self): @@ -1320,33 +1323,69 @@ def test_nonascii_keywords(self): f"this function got an unexpected keyword argument '{name2}'"): parse((), {name2: 1, name3: 2}, '|OO', [name, name3]) - def test_nested_tuple(self): + def test_nested_sequence(self): parse = _testcapi.parse_tuple_and_keywords self.assertEqual(parse(((1, 2, 3),), {}, '(OOO)', ['a']), (1, 2, 3)) self.assertEqual(parse((1, (2, 3), 4), {}, 'O(OO)O', ['a', 'b', 'c']), (1, 2, 3, 4)) parse(((1, 2, 3),), {}, '(iii)', ['a']) + parse(([1, 2, 3],), {}, '(iii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be sequence of length 2, not 3"): + "argument 1 must be tuple of length 2, not 3"): parse(((1, 2, 3),), {}, '(ii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be sequence of length 2, not 1"): + "argument 1 must be tuple of length 2, not 1"): parse(((1,),), {}, '(ii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be 2-item sequence, not int"): + "argument 1 must be sequence of length 2, not 3"): + parse(([1, 2, 3],), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be sequence of length 2, not 1"): + parse(([1,],), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not int"): parse((1,), {}, '(ii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be 2-item sequence, not bytes"): + "argument 1 must be 2-item tuple, not None$"): + parse((None,), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not str"): + parse(('ab',), {}, '(CC)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not bytes"): parse((b'ab',), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not bytearray"): + parse((bytearray(b'ab'),), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not dict"): + parse(({},), {}, '(ii)', ['a']) + + with self.assertWarnsRegex(DeprecationWarning, + "argument must be 3-item tuple, not list"): + self.assertEqual(parse(([1, 2, 3],), {}, '(OOO)', ['a']), (1, 2, 3)) + with self.assertWarnsRegex(DeprecationWarning, + "argument must be 2-item tuple, not list"): + with self.assertRaisesRegex(TypeError, + "argument 1 must be tuple of length 2, not 3"): + parse(([1, 2, 3],), {}, '(OO)', ['a']) + with self.assertWarnsRegex(DeprecationWarning, + "argument must be 2-item tuple, not list"): + with self.assertRaisesRegex(TypeError, + "argument 1 must be tuple of length 2, not 1"): + parse(([1,],), {}, '(OO)', ['a']) for f in 'es', 'et', 'es#', 'et#': with self.assertRaises(LookupError): # empty encoding "" parse((('a',),), {}, '(' + f + ')', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be sequence of length 1, not 0"): + "argument 1 must be tuple of length 1, not 0"): parse(((),), {}, '(' + f + ')', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be sequence of length 1, not 0"): + parse(([],), {}, '(' + f + ')', ['a']) @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') def test_gh_119213(self): diff --git a/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst new file mode 100644 index 00000000000000..5b761d1d1cf0fc --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst @@ -0,0 +1,5 @@ +Non-tuple sequences are deprecated as argument for the ``(items)`` format +unit in :c:func:`PyArg_ParseTuple` and other :ref:`argument parsing +` functions if *items* contains format units which store +a :ref:`borrowed buffer ` or +a :term:`borrowed reference`. diff --git a/Python/getargs.c b/Python/getargs.c index 022a4115038902..08325ca5a87c49 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -466,6 +466,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; int i; Py_ssize_t len; + int istuple = PyTuple_Check(arg); + int mustbetuple = istuple; for (;;) { int c = *format++; @@ -481,51 +483,104 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (c == ':' || c == ';' || c == '\0') break; - else if (level == 0 && Py_ISALPHA(c) && c != 'e') - n++; + else { + if (level == 0 && Py_ISALPHA(c)) { + n++; + } + if (c == 'e' && (*format == 's' || *format == 't')) { + format++; + continue; + } + if (!mustbetuple) { + switch (c) { + case 'y': + case 's': + case 'z': + if (*format != '*') { + mustbetuple = 1; + } + break; + case 'S': + case 'Y': + case 'U': + mustbetuple = 1; + break; + case 'O': + if (*format != '&') { + mustbetuple = 1; + } + break; + } + } + } } - if (!PySequence_Check(arg) || PyBytes_Check(arg)) { + if (istuple) { + /* fallthrough */ + } + else if (!PySequence_Check(arg) || + PyUnicode_Check(arg) || PyBytes_Check(arg) || PyByteArray_Check(arg)) + { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be %d-item sequence, not %.50s", + "must be %d-item tuple, not %.50s", n, arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } + else { + if (mustbetuple) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 0, + "argument must be %d-item tuple, not %T", n, arg)) + { + return msgbuf; + } + } + len = PySequence_Size(arg); + if (len != n) { + levels[0] = 0; + PyOS_snprintf(msgbuf, bufsize, + "must be %s of length %d, not %zd", + mustbetuple ? "tuple" : "sequence", n, len); + return msgbuf; + } + arg = PySequence_Tuple(arg); + if (arg == NULL) { + return msgbuf; + } + } - len = PySequence_Size(arg); + len = PyTuple_GET_SIZE(arg); if (len != n) { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be sequence of length %d, not %zd", + "must be tuple of length %d, not %zd", n, len); + if (!istuple) { + Py_DECREF(arg); + } return msgbuf; } format = *p_format; for (i = 0; i < n; i++) { const char *msg; - PyObject *item; - item = PySequence_GetItem(arg, i); - if (item == NULL) { - PyErr_Clear(); - levels[0] = i+1; - levels[1] = 0; - strncpy(msgbuf, "is not retrievable", bufsize); - return msgbuf; - } + PyObject *item = PyTuple_GET_ITEM(arg, i); msg = convertitem(item, &format, p_va, flags, levels+1, msgbuf, bufsize, freelist); - /* PySequence_GetItem calls tp->sq_item, which INCREFs */ - Py_XDECREF(item); if (msg != NULL) { levels[0] = i+1; + if (!istuple) { + Py_DECREF(arg); + } return msg; } } *p_format = format; + if (!istuple) { + Py_DECREF(arg); + } return NULL; }