From 6ad1ff65c05fc8ecd94ec537674d1f1f2638f3d8 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 31 Dec 2024 15:28:52 +0200 Subject: [PATCH 1/7] gh-50333: Deprecate support of non-tuple sequences in PyArg_ParseTuple() Non-tuple sequences are deprecated as argument for the "(items)" format unit in PyArg_ParseTuple() and other argument parsing functions if items contains format units which store borrowed buffer or reference (e.g. "s" and "O"). str and bytearray are no longer accepted as valid sequences. --- Doc/c-api/arg.rst | 12 +++ Doc/whatsnew/3.14.rst | 7 ++ Lib/test/test_capi/test_getargs.py | 57 ++++++++++--- ...4-12-31-15-28-14.gh-issue-50333.KxQUXa.rst | 5 ++ Python/getargs.c | 81 +++++++++++++++---- 5 files changed, 136 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 3201bdc82691f4..8b32dd9819a737 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -344,6 +344,18 @@ Other objects in *items*. The C arguments must correspond to the individual format units in *items*. Format units for sequences may be nested. + If *items* contains format units which store + :ref:`borrowed buffer ` or + :term:`reference ` + (``y``, ``y#``, ``s``, ``s#``, ``z``, ``z#``, ``S``, ``Y``, ``U``, + ``O`` or ``O!``) the object must be a Python tuple. + The *converter* for the ``O&`` format unit in *items* must not store + borrowed buffer or reference. + + .. deprecated:: next + Non-tuple sequences are deprecated if *items* contains format units + which store borrowed buffer or reference. + It is possible to pass "long" integers (integers whose value exceeds the platform's :c:macro:`LONG_MAX`) however no proper range checking is done --- the most significant bits are silently truncated when the receiving field is too diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 63fa21e17bc834..a9a005c4a4b7e6 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1227,6 +1227,13 @@ Deprecated :c:macro:`!isfinite` available from :file:`math.h` since C99. (Contributed by Sergey B Kirpichev in :gh:`119613`.) +* Non-tuple sequences are deprecated as argument for the ``(items)`` + format unit in :c:func:`PyArg_ParseTuple` and other + :ref:`argument parsing ` functions if *items* contains + format units which store :ref:`borrowed buffer ` or + :term:`reference `. + (Contributed by Serhiy Storchaka in :gh:`50333`.) + * The previously undocumented function :c:func:`PySequence_In` is :term:`soft deprecated`. Use :c:func:`PySequence_Contains` instead. (Contributed by Yuki Kobayashi in :gh:`127896`.) diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 703d228f92e713..6334e7a74c073c 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -64,6 +64,9 @@ NULL = None +class CustomError(Exception): + pass + class Index: def __index__(self): return 99 @@ -588,12 +591,12 @@ def test_tuple(self): self.assertEqual(ret, (1,2,3)) # make sure invalid tuple arguments are handled correctly - class seq: + class TestSeq: def __len__(self): return 2 def __getitem__(self, n): - raise ValueError - self.assertRaises(TypeError, getargs_tuple, 1, seq()) + raise CustomError + self.assertRaises(CustomError, getargs_tuple, 1, TestSeq()) class Keywords_TestCase(unittest.TestCase): def test_kwargs(self): @@ -1321,33 +1324,69 @@ def test_nonascii_keywords(self): f"this function got an unexpected keyword argument '{name2}'"): parse((), {name2: 1, name3: 2}, '|OO', [name, name3]) - def test_nested_tuple(self): + def test_nested_sequence(self): parse = _testcapi.parse_tuple_and_keywords self.assertEqual(parse(((1, 2, 3),), {}, '(OOO)', ['a']), (1, 2, 3)) self.assertEqual(parse((1, (2, 3), 4), {}, 'O(OO)O', ['a', 'b', 'c']), (1, 2, 3, 4)) parse(((1, 2, 3),), {}, '(iii)', ['a']) + parse(([1, 2, 3],), {}, '(iii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be sequence of length 2, not 3"): + "argument 1 must be tuple of length 2, not 3"): parse(((1, 2, 3),), {}, '(ii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be sequence of length 2, not 1"): + "argument 1 must be tuple of length 2, not 1"): parse(((1,),), {}, '(ii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be 2-item sequence, not int"): + "argument 1 must be sequence of length 2, not 3"): + parse(([1, 2, 3],), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be sequence of length 2, not 1"): + parse(([1,],), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not int"): parse((1,), {}, '(ii)', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be 2-item sequence, not bytes"): + "argument 1 must be 2-item tuple, not None$"): + parse((None,), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not str"): + parse(('ab',), {}, '(CC)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not bytes"): parse((b'ab',), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not bytearray"): + parse((bytearray(b'ab'),), {}, '(ii)', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be 2-item tuple, not dict"): + parse(({},), {}, '(ii)', ['a']) + + with self.assertWarnsRegex(DeprecationWarning, + "argument must be 3-item tuple, not list"): + self.assertEqual(parse(([1, 2, 3],), {}, '(OOO)', ['a']), (1, 2, 3)) + with self.assertWarnsRegex(DeprecationWarning, + "argument must be 2-item tuple, not list"): + with self.assertRaisesRegex(TypeError, + "argument 1 must be tuple of length 2, not 3"): + parse(([1, 2, 3],), {}, '(OO)', ['a']) + with self.assertWarnsRegex(DeprecationWarning, + "argument must be 2-item tuple, not list"): + with self.assertRaisesRegex(TypeError, + "argument 1 must be tuple of length 2, not 1"): + parse(([1,],), {}, '(OO)', ['a']) for f in 'es', 'et', 'es#', 'et#': with self.assertRaises(LookupError): # empty encoding "" parse((('a',),), {}, '(' + f + ')', ['a']) with self.assertRaisesRegex(TypeError, - "argument 1 must be sequence of length 1, not 0"): + "argument 1 must be tuple of length 1, not 0"): parse(((),), {}, '(' + f + ')', ['a']) + with self.assertRaisesRegex(TypeError, + "argument 1 must be sequence of length 1, not 0"): + parse(([],), {}, '(' + f + ')', ['a']) @unittest.skipIf(_testinternalcapi is None, 'needs _testinternalcapi') def test_gh_119213(self): diff --git a/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst new file mode 100644 index 00000000000000..588dd6a222009c --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst @@ -0,0 +1,5 @@ +Non-tuple sequences are deprecated as argument for the ``(items)`` format +unit in :c:func:`PyArg_ParseTuple` and other :ref:`argument parsing +` functions if *items* contains format units which store +:ref:`borrowed buffer ` or :term:`reference `. diff --git a/Python/getargs.c b/Python/getargs.c index 6485a714a4e3fe..0865e335f90a62 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -465,6 +465,7 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; int i; Py_ssize_t len; + int mustbetuple = PyTuple_Check(arg); for (;;) { int c = *format++; @@ -480,51 +481,97 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (c == ':' || c == ';' || c == '\0') break; - else if (level == 0 && Py_ISALPHA(c) && c != 'e') - n++; + else { + if (level == 0 && Py_ISALPHA(c)) + n++; + if (c == 'e' && (*format == 's' || *format == 't')) { + format++; + continue; + } + if (!mustbetuple) { + switch (c) { + case 'y': + case 's': + case 'z': + if (*format != '*') { + mustbetuple = 1; + } + break; + case 'S': + case 'Y': + case 'U': + mustbetuple = 1; + break; + case 'O': + if (*format != '&') { + mustbetuple = 1; + } + break; + } + } + } } - if (!PySequence_Check(arg) || PyBytes_Check(arg)) { + if (PyTuple_Check(arg)) { + Py_INCREF(arg); + } + else if (!PySequence_Check(arg) || + PyUnicode_Check(arg) || PyBytes_Check(arg) || PyByteArray_Check(arg)) + { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be %d-item sequence, not %.50s", + "must be %d-item tuple, not %.50s", n, arg == Py_None ? "None" : Py_TYPE(arg)->tp_name); return msgbuf; } + else { + if (mustbetuple) { + if (PyErr_WarnFormat(PyExc_DeprecationWarning, 0, + "argument must be %d-item tuple, not %T", n, arg)) + { + return msgbuf; + } + } + len = PySequence_Size(arg); + if (len != n) { + levels[0] = 0; + PyOS_snprintf(msgbuf, bufsize, + "must be %s of length %d, not %zd", + mustbetuple ? "tuple" : "sequence", n, len); + return msgbuf; + } + arg = PySequence_Tuple(arg); + if (arg == NULL) { + return msgbuf; + } + } - len = PySequence_Size(arg); + len = PyTuple_GET_SIZE(arg); if (len != n) { levels[0] = 0; PyOS_snprintf(msgbuf, bufsize, - "must be sequence of length %d, not %zd", + "must be tuple of length %d, not %zd", n, len); + Py_DECREF(arg); return msgbuf; } format = *p_format; for (i = 0; i < n; i++) { const char *msg; - PyObject *item; - item = PySequence_GetItem(arg, i); - if (item == NULL) { - PyErr_Clear(); - levels[0] = i+1; - levels[1] = 0; - strncpy(msgbuf, "is not retrievable", bufsize); - return msgbuf; - } + PyObject *item = PyTuple_GET_ITEM(arg, i); msg = convertitem(item, &format, p_va, flags, levels+1, msgbuf, bufsize, freelist); - /* PySequence_GetItem calls tp->sq_item, which INCREFs */ - Py_XDECREF(item); if (msg != NULL) { levels[0] = i+1; + Py_DECREF(arg); return msg; } } *p_format = format; + Py_DECREF(arg); return NULL; } From 9878b37001a944fd2bd16e70268432c29c8cc972 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 2 Jan 2025 14:19:27 +0200 Subject: [PATCH 2/7] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Doc/c-api/arg.rst | 10 +++++----- Doc/whatsnew/3.14.rst | 4 ++-- Lib/test/test_capi/test_getargs.py | 2 +- .../2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 8b32dd9819a737..d1cdc7f07ea5e5 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -344,17 +344,17 @@ Other objects in *items*. The C arguments must correspond to the individual format units in *items*. Format units for sequences may be nested. - If *items* contains format units which store + If *items* contains format units which store a :ref:`borrowed buffer ` or - :term:`reference ` + a :term:`borrowed reference` (``y``, ``y#``, ``s``, ``s#``, ``z``, ``z#``, ``S``, ``Y``, ``U``, - ``O`` or ``O!``) the object must be a Python tuple. + ``O`` or ``O!``), the object must be a Python tuple. The *converter* for the ``O&`` format unit in *items* must not store - borrowed buffer or reference. + a borrowed buffer or reference. .. deprecated:: next Non-tuple sequences are deprecated if *items* contains format units - which store borrowed buffer or reference. + which store a borrowed buffer or reference. It is possible to pass "long" integers (integers whose value exceeds the platform's :c:macro:`LONG_MAX`) however no proper range checking is done --- the diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index a9a005c4a4b7e6..d6827c56cd42a5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1230,8 +1230,8 @@ Deprecated * Non-tuple sequences are deprecated as argument for the ``(items)`` format unit in :c:func:`PyArg_ParseTuple` and other :ref:`argument parsing ` functions if *items* contains - format units which store :ref:`borrowed buffer ` or - :term:`reference `. + format units which store a :ref:`borrowed buffer ` + or a :term:`borrowed reference`. (Contributed by Serhiy Storchaka in :gh:`50333`.) * The previously undocumented function :c:func:`PySequence_In` is :term:`soft deprecated`. diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 6334e7a74c073c..57d97f4a0f5853 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -590,7 +590,7 @@ def test_tuple(self): ret = getargs_tuple(1, (2, 3)) self.assertEqual(ret, (1,2,3)) - # make sure invalid tuple arguments are handled correctly + # make sure invalid sequence arguments are handled correctly class TestSeq: def __len__(self): return 2 diff --git a/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst index 588dd6a222009c..64d231b1a1ea3f 100644 --- a/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst +++ b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst @@ -1,5 +1,5 @@ Non-tuple sequences are deprecated as argument for the ``(items)`` format unit in :c:func:`PyArg_ParseTuple` and other :ref:`argument parsing ` functions if *items* contains format units which store -:ref:`borrowed buffer ` or :term:`reference `. +a :ref:`borrowed buffer ` or a :term:`borrowed +reference`. From e7198bc060670e9603148393c660bdd945607dd9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 2 Jan 2025 15:33:18 +0200 Subject: [PATCH 3/7] Duplicate "a borrowed". --- Doc/c-api/arg.rst | 8 ++++---- .../C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index d1cdc7f07ea5e5..3bf02ed3e21bac 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -344,17 +344,17 @@ Other objects in *items*. The C arguments must correspond to the individual format units in *items*. Format units for sequences may be nested. - If *items* contains format units which store a - :ref:`borrowed buffer ` or + If *items* contains format units which store + a :ref:`borrowed buffer ` or a :term:`borrowed reference` (``y``, ``y#``, ``s``, ``s#``, ``z``, ``z#``, ``S``, ``Y``, ``U``, ``O`` or ``O!``), the object must be a Python tuple. The *converter* for the ``O&`` format unit in *items* must not store - a borrowed buffer or reference. + a borrowed buffer or a borrowed reference. .. deprecated:: next Non-tuple sequences are deprecated if *items* contains format units - which store a borrowed buffer or reference. + which store a borrowed buffer or a borrowed reference. It is possible to pass "long" integers (integers whose value exceeds the platform's :c:macro:`LONG_MAX`) however no proper range checking is done --- the diff --git a/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst index 64d231b1a1ea3f..5b761d1d1cf0fc 100644 --- a/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst +++ b/Misc/NEWS.d/next/C_API/2024-12-31-15-28-14.gh-issue-50333.KxQUXa.rst @@ -1,5 +1,5 @@ Non-tuple sequences are deprecated as argument for the ``(items)`` format unit in :c:func:`PyArg_ParseTuple` and other :ref:`argument parsing ` functions if *items* contains format units which store -a :ref:`borrowed buffer ` or a :term:`borrowed -reference`. +a :ref:`borrowed buffer ` or +a :term:`borrowed reference`. From fac19893beb22a0e96e90d6d453bd7d88b135807 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 2 Jan 2025 15:47:26 +0200 Subject: [PATCH 4/7] Re-order format units. --- Doc/c-api/arg.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 3bf02ed3e21bac..43617dcc088ba8 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -344,11 +344,10 @@ Other objects in *items*. The C arguments must correspond to the individual format units in *items*. Format units for sequences may be nested. - If *items* contains format units which store - a :ref:`borrowed buffer ` or - a :term:`borrowed reference` - (``y``, ``y#``, ``s``, ``s#``, ``z``, ``z#``, ``S``, ``Y``, ``U``, - ``O`` or ``O!``), the object must be a Python tuple. + If *items* contains format units which store a :ref:`borrowed buffer + ` (``s``, ``s#``, ``z``, ``z#``, ``y``, or ``y#``) + or a :term:`borrowed reference` (``S``, ``Y``, ``U``, ``O``, or ``O!``), + the object must be a Python tuple. The *converter* for the ``O&`` format unit in *items* must not store a borrowed buffer or a borrowed reference. From ab1ea18804d30fde72d2f4efa4a93731390a04ae Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 2 Jan 2025 16:20:00 +0200 Subject: [PATCH 5/7] Update Python/getargs.c Co-authored-by: Erlend E. Aasland --- Python/getargs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/getargs.c b/Python/getargs.c index 0865e335f90a62..2ea469a8ba916c 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -482,8 +482,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, else if (c == ':' || c == ';' || c == '\0') break; else { - if (level == 0 && Py_ISALPHA(c)) + if (level == 0 && Py_ISALPHA(c)) { n++; + } if (c == 'e' && (*format == 's' || *format == 't')) { format++; continue; From 44fd7000294b45fc575ba2250aac64e2383b7f9f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 3 Jan 2025 17:00:29 +0200 Subject: [PATCH 6/7] Update docs. --- Doc/c-api/arg.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 43617dcc088ba8..d284c6ef96e3ea 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -339,8 +339,9 @@ Other objects .. versionadded:: 3.3 -``(items)`` (:class:`tuple`) [*matching-items*] - The object must be a Python sequence whose length is the number of format units +``(items)`` (sequence) [*matching-items*] + The object must be a Python sequence (except :class:`str`, :class:`bytes` + or :class:`bytearray`) whose length is the number of format units in *items*. The C arguments must correspond to the individual format units in *items*. Format units for sequences may be nested. @@ -351,6 +352,9 @@ Other objects The *converter* for the ``O&`` format unit in *items* must not store a borrowed buffer or a borrowed reference. + .. versionchanged:: next + :class:`str` and :class:`bytearray` objects no longer accepted as a sequence. + .. deprecated:: next Non-tuple sequences are deprecated if *items* contains format units which store a borrowed buffer or a borrowed reference. From d700cadbaea6cda456fa9a960497571cee5fdd81 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 6 Apr 2025 00:10:29 +0300 Subject: [PATCH 7/7] Minimize refcount changes. --- Python/getargs.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Python/getargs.c b/Python/getargs.c index 38c045b3910d89..08325ca5a87c49 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -466,7 +466,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, const char *format = *p_format; int i; Py_ssize_t len; - int mustbetuple = PyTuple_Check(arg); + int istuple = PyTuple_Check(arg); + int mustbetuple = istuple; for (;;) { int c = *format++; @@ -514,8 +515,8 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } } - if (PyTuple_Check(arg)) { - Py_INCREF(arg); + if (istuple) { + /* fallthrough */ } else if (!PySequence_Check(arg) || PyUnicode_Check(arg) || PyBytes_Check(arg) || PyByteArray_Check(arg)) @@ -555,7 +556,9 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, PyOS_snprintf(msgbuf, bufsize, "must be tuple of length %d, not %zd", n, len); - Py_DECREF(arg); + if (!istuple) { + Py_DECREF(arg); + } return msgbuf; } @@ -567,13 +570,17 @@ converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags, msgbuf, bufsize, freelist); if (msg != NULL) { levels[0] = i+1; - Py_DECREF(arg); + if (!istuple) { + Py_DECREF(arg); + } return msg; } } *p_format = format; - Py_DECREF(arg); + if (!istuple) { + Py_DECREF(arg); + } return NULL; }