From 240e7e5e5104b91951b606be61da1a10613a4b01 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 22 Apr 2020 22:06:30 +0900 Subject: [PATCH 1/9] bpo-36346: array: Don't use deprecated APIs * Py_UNICODE -> wchar_t * Py_UNICODE -> unicode in Argument Clinic * PyUnicode_AsUnicode -> PyUnicode_AsWideCharString --- Modules/arraymodule.c | 65 +++++++++++++++------------------- Modules/clinic/arraymodule.c.h | 17 +++++---- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 4920ad7b82124c..54f2630453d600 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -235,13 +235,13 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) static PyObject * u_getitem(arrayobject *ap, Py_ssize_t i) { - return PyUnicode_FromOrdinal(((Py_UNICODE *) ap->ob_item)[i]); + return PyUnicode_FromOrdinal(((wchar_t *) ap->ob_item)[i]); } static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { - Py_UNICODE *p; + wchar_t *p; Py_ssize_t len; if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) @@ -252,7 +252,7 @@ u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) return -1; } if (i >= 0) - ((Py_UNICODE *)ap->ob_item)[i] = p[0]; + ((wchar_t *)ap->ob_item)[i] = p[0]; return 0; } @@ -530,7 +530,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) DEFINE_COMPAREITEMS(b, signed char) DEFINE_COMPAREITEMS(BB, unsigned char) -DEFINE_COMPAREITEMS(u, Py_UNICODE) +DEFINE_COMPAREITEMS(u, wchar_t) DEFINE_COMPAREITEMS(h, short) DEFINE_COMPAREITEMS(HH, unsigned short) DEFINE_COMPAREITEMS(i, int) @@ -548,7 +548,7 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long) static const struct arraydescr descriptors[] = { {'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1}, {'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0}, - {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, u_compareitems, "u", 0, 0}, + {'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0}, {'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1}, {'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0}, {'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1}, @@ -1660,7 +1660,7 @@ array_array_tobytes_impl(arrayobject *self) /*[clinic input] array.array.fromunicode - ustr: Py_UNICODE(zeroes=True) + ustr: unicode / Extends this array with data from the unicode string ustr. @@ -1671,25 +1671,29 @@ some other type. [clinic start generated code]*/ static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length) -/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/ +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) +/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/ { - char typecode; - - typecode = self->ob_descr->typecode; - if (typecode != 'u') { + if (self->ob_descr->typecode != 'u') { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; } - if (ustr_length > 0) { + + Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0); + if (ustr_length > 1) { + ustr_length--; /* trim trailing NUL character */ Py_ssize_t old_size = Py_SIZE(self); - if (array_resize(self, old_size + ustr_length) == -1) + if (array_resize(self, old_size + ustr_length) == -1) { + return NULL; + } + + Py_ssize_t res = PyUnicode_AsWideChar( + ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); + if (res < 0) { // must not happen return NULL; - memcpy(self->ob_item + old_size * sizeof(Py_UNICODE), - ustr, ustr_length * sizeof(Py_UNICODE)); + } } Py_RETURN_NONE; @@ -1709,14 +1713,12 @@ static PyObject * array_array_tounicode_impl(arrayobject *self) /*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/ { - char typecode; - typecode = self->ob_descr->typecode; - if (typecode != 'u') { + if (self->ob_descr->typecode != 'u') { PyErr_SetString(PyExc_ValueError, "tounicode() may only be called on unicode type arrays"); return NULL; } - return PyUnicode_FromWideChar((Py_UNICODE *) self->ob_item, Py_SIZE(self)); + return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self)); } /*[clinic input] @@ -2675,30 +2677,21 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(v); } else if (initial != NULL && PyUnicode_Check(initial)) { - Py_UNICODE *ustr; Py_ssize_t n; - - ustr = PyUnicode_AsUnicode(initial); + wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n); if (ustr == NULL) { - PyErr_NoMemory(); Py_DECREF(a); return NULL; } - n = PyUnicode_GET_DATA_SIZE(initial); if (n > 0) { arrayobject *self = (arrayobject *)a; - char *item = self->ob_item; - item = (char *)PyMem_Realloc(item, n); - if (item == NULL) { - PyErr_NoMemory(); - Py_DECREF(a); - return NULL; + if (self->ob_item != NULL) { + PyMem_Free(self->ob_item); } - self->ob_item = item; - Py_SET_SIZE(self, n / sizeof(Py_UNICODE)); - memcpy(item, ustr, n); - self->allocated = Py_SIZE(self); + self->ob_item = (char *)ustr; + Py_SET_SIZE(self, n); + self->allocated = n; } } else if (initial != NULL && array_Check(initial) && len > 0) { diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h index e1f4b0397b9cb5..b9245ca91d5fa9 100644 --- a/Modules/clinic/arraymodule.c.h +++ b/Modules/clinic/arraymodule.c.h @@ -380,20 +380,23 @@ PyDoc_STRVAR(array_array_fromunicode__doc__, {"fromunicode", (PyCFunction)array_array_fromunicode, METH_O, array_array_fromunicode__doc__}, static PyObject * -array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr, - Py_ssize_clean_t ustr_length); +array_array_fromunicode_impl(arrayobject *self, PyObject *ustr); static PyObject * array_array_fromunicode(arrayobject *self, PyObject *arg) { PyObject *return_value = NULL; - const Py_UNICODE *ustr; - Py_ssize_clean_t ustr_length; + PyObject *ustr; - if (!PyArg_Parse(arg, "u#:fromunicode", &ustr, &ustr_length)) { + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("fromunicode", "argument", "str", arg); goto exit; } - return_value = array_array_fromunicode_impl(self, ustr, ustr_length); + if (PyUnicode_READY(arg) == -1) { + goto exit; + } + ustr = arg; + return_value = array_array_fromunicode_impl(self, ustr); exit: return return_value; @@ -531,4 +534,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__, #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF \ {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__}, -/*[clinic end generated code: output=f649fc0bc9f6b13a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9f70748dd3bc532f input=a9049054013a1b77]*/ From 1d9569ae7acfffe726dcd94f6890c8165d750e9b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 23 Apr 2020 10:01:09 +0900 Subject: [PATCH 2/9] update doc --- Doc/library/array.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Doc/library/array.rst b/Doc/library/array.rst index c9a9b1dabb2a79..823d4aa755de64 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -22,7 +22,7 @@ defined: +-----------+--------------------+-------------------+-----------------------+-------+ | ``'B'`` | unsigned char | int | 1 | | +-----------+--------------------+-------------------+-----------------------+-------+ -| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) | +| ``'u'`` | wchar_t | Unicode character | 2 | \(1) | +-----------+--------------------+-------------------+-----------------------+-------+ | ``'h'`` | signed short | int | 2 | | +-----------+--------------------+-------------------+-----------------------+-------+ @@ -48,13 +48,10 @@ defined: Notes: (1) - The ``'u'`` type code corresponds to Python's obsolete unicode character + The ``'u'`` type code had corresponded to Python's obsolete unicode character (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the platform, it can be 16 bits or 32 bits. - ``'u'`` will be removed together with the rest of the :c:type:`Py_UNICODE` - API. - .. deprecated-removed:: 3.3 4.0 The actual representation of values is determined by the machine architecture From c075693889b6338a5f011d1b9c448100fb2ea364 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 27 Apr 2020 17:54:16 +0900 Subject: [PATCH 3/9] Add what's new entry --- Doc/whatsnew/3.9.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 20ebe92865a146..55f8a655e3d538 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -832,6 +832,12 @@ Changes in the Python API inherit from it should have this method defined. (Contributed by Kyle Stanley in :issue:`34037`.) +* ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``. + This change doesn't affect to its behavior because ``Py_UNICODE`` is alias + of ``wchar_t`` since Python 3.3. Although ``array('u')`` is deprecated, + it may be alive after ``Py_UNICODE`` is removed. + (Contributed by Inada Naoki in :issue:`34538`.) + CPython bytecode changes ------------------------ From 3cf002800dff411a4a4eb8fab33e5759a0b08b31 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 30 Apr 2020 13:16:01 +0900 Subject: [PATCH 4/9] Apply suggestions from code review Co-Authored-By: Victor Stinner --- Doc/whatsnew/3.9.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 55f8a655e3d538..72f54cf21251f7 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -834,8 +834,7 @@ Changes in the Python API * ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``. This change doesn't affect to its behavior because ``Py_UNICODE`` is alias - of ``wchar_t`` since Python 3.3. Although ``array('u')`` is deprecated, - it may be alive after ``Py_UNICODE`` is removed. + of ``wchar_t`` since Python 3.3. (Contributed by Inada Naoki in :issue:`34538`.) From fbc39251fff47ff3ebb9f38247f1937a79d3ace6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 30 Apr 2020 14:38:13 +0900 Subject: [PATCH 5/9] Update doc --- Doc/library/array.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Doc/library/array.rst b/Doc/library/array.rst index 823d4aa755de64..78020738bf4f75 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -48,12 +48,16 @@ defined: Notes: (1) - The ``'u'`` type code had corresponded to Python's obsolete unicode character - (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the - platform, it can be 16 bits or 32 bits. + It can be 16 bits or 32 bits depending on the platform. + + .. versionchanged:: 3.9 + ``array('u')`` now uses ``wchar_t`` as C type instead of deprecated + ``Py_UNICODE``. This change doesn't affect to its behavior because + ``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3. .. deprecated-removed:: 3.3 4.0 + The actual representation of values is determined by the machine architecture (strictly speaking, by the C implementation). The actual size can be accessed through the :attr:`itemsize` attribute. From bea1779557ae3fb1f72be6762a91c3bca7156f2c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 30 Apr 2020 15:16:43 +0900 Subject: [PATCH 6/9] Don't use deprecated "u#" format. --- Modules/arraymodule.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 54f2630453d600..959837f60ac6df 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -241,18 +241,25 @@ u_getitem(arrayobject *ap, Py_ssize_t i) static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) { - wchar_t *p; - Py_ssize_t len; - - if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len)) + PyObject *u; + if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) { return -1; - if (len != 1) { + } + + Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0); + if (len != 2) { PyErr_SetString(PyExc_TypeError, "array item must be unicode character"); return -1; } - if (i >= 0) - ((wchar_t *)ap->ob_item)[i] = p[0]; + + wchar_t w; + len = PyUnicode_AsWideChar(u, &w, 1); + assert(len == 1); + + if (i >= 0) { + ((wchar_t *)ap->ob_item)[i] = w; + } return 0; } From df0ea67ea1099c7b81bc577f80d077c4e6c4bd5d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 1 May 2020 15:17:54 +0900 Subject: [PATCH 7/9] more assert --- Modules/arraymodule.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 959837f60ac6df..44d642b55e5689 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1689,6 +1689,7 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) } Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0); + assert(ustr_length > 0); if (ustr_length > 1) { ustr_length--; /* trim trailing NUL character */ Py_ssize_t old_size = Py_SIZE(self); @@ -1698,9 +1699,7 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) Py_ssize_t res = PyUnicode_AsWideChar( ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); - if (res < 0) { // must not happen - return NULL; - } + assert(res == ustr_length); } Py_RETURN_NONE; From 8454e8cdec80ad85a79682c73fa7fad64084fc74 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 1 May 2020 15:19:37 +0900 Subject: [PATCH 8/9] remove redundant NULL check --- Modules/arraymodule.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 44d642b55e5689..d77eb1a6a28ebf 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -2692,9 +2692,8 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (n > 0) { arrayobject *self = (arrayobject *)a; - if (self->ob_item != NULL) { - PyMem_Free(self->ob_item); - } + // self->ob_item may be NULL but it is safe. + PyMem_Free(self->ob_item); self->ob_item = (char *)ustr; Py_SET_SIZE(self, n); self->allocated = n; From 4ddac33e97846aa6eb2aafdbce544c99f253ef9f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 2 May 2020 01:53:47 +0900 Subject: [PATCH 9/9] remove redundant assert --- Modules/arraymodule.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index d77eb1a6a28ebf..732703e481adcd 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -1697,9 +1697,9 @@ array_array_fromunicode_impl(arrayobject *self, PyObject *ustr) return NULL; } - Py_ssize_t res = PyUnicode_AsWideChar( - ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); - assert(res == ustr_length); + // must not fail + PyUnicode_AsWideChar( + ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length); } Py_RETURN_NONE;