Skip to content

bpo-36346: array: Don't use deprecated APIs #19653

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions Doc/library/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ defined:
+-----------+--------------------+-------------------+-----------------------+-------+
| ``'B'`` | unsigned char | int | 1 | |
+-----------+--------------------+-------------------+-----------------------+-------+
| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) |
| ``'u'`` | wchar_t | Unicode character | 2 | \(1) |
+-----------+--------------------+-------------------+-----------------------+-------+
| ``'h'`` | signed short | int | 2 | |
+-----------+--------------------+-------------------+-----------------------+-------+
Expand All @@ -48,15 +48,16 @@ defined:
Notes:

(1)
The ``'u'`` type code corresponds to Python's obsolete unicode character
(:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
platform, it can be 16 bits or 32 bits.
It can be 16 bits or 32 bits depending on the platform.

``'u'`` will be removed together with the rest of the :c:type:`Py_UNICODE`
API.
.. versionchanged:: 3.9
``array('u')`` now uses ``wchar_t`` as C type instead of deprecated
``Py_UNICODE``. This change doesn't affect to its behavior because
``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3.

.. deprecated-removed:: 3.3 4.0


The actual representation of values is determined by the machine architecture
(strictly speaking, by the C implementation). The actual size can be accessed
through the :attr:`itemsize` attribute.
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.9.rst
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,12 @@ Changes in the Python API
``PyCF_ALLOW_TOP_LEVEL_AWAIT`` was clashing with ``CO_FUTURE_DIVISION``.
(Contributed by Batuhan Taskaya in :issue:`39562`)

* ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``.
This change doesn't affect to its behavior because ``Py_UNICODE`` is alias
of ``wchar_t`` since Python 3.3.
(Contributed by Inada Naoki in :issue:`34538`.)


CPython bytecode changes
------------------------

Expand Down
82 changes: 40 additions & 42 deletions Modules/arraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,24 +235,31 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
static PyObject *
u_getitem(arrayobject *ap, Py_ssize_t i)
{
return PyUnicode_FromOrdinal(((Py_UNICODE *) ap->ob_item)[i]);
return PyUnicode_FromOrdinal(((wchar_t *) ap->ob_item)[i]);
}

static int
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
{
Py_UNICODE *p;
Py_ssize_t len;

if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
PyObject *u;
if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) {
return -1;
if (len != 1) {
}

Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0);
if (len != 2) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Windows with 16-bit wchar_t, characters larger than U+FFFF take two wchar_t, no? (A surrogate pair.) Maybe change the error message if this case happens?

PyErr_SetString(PyExc_TypeError,
"array item must be unicode character");
return -1;
}
if (i >= 0)
((Py_UNICODE *)ap->ob_item)[i] = p[0];

wchar_t w;
len = PyUnicode_AsWideChar(u, &w, 1);
assert(len == 1);

if (i >= 0) {
((wchar_t *)ap->ob_item)[i] = w;
}
return 0;
}

Expand Down Expand Up @@ -530,7 +537,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)

DEFINE_COMPAREITEMS(b, signed char)
DEFINE_COMPAREITEMS(BB, unsigned char)
DEFINE_COMPAREITEMS(u, Py_UNICODE)
DEFINE_COMPAREITEMS(u, wchar_t)
DEFINE_COMPAREITEMS(h, short)
DEFINE_COMPAREITEMS(HH, unsigned short)
DEFINE_COMPAREITEMS(i, int)
Expand All @@ -548,7 +555,7 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long)
static const struct arraydescr descriptors[] = {
{'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1},
{'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0},
{'u', sizeof(Py_UNICODE), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
{'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
{'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1},
{'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0},
{'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1},
Expand Down Expand Up @@ -1660,7 +1667,7 @@ array_array_tobytes_impl(arrayobject *self)
/*[clinic input]
array.array.fromunicode

ustr: Py_UNICODE(zeroes=True)
ustr: unicode
/

Extends this array with data from the unicode string ustr.
Expand All @@ -1671,25 +1678,28 @@ some other type.
[clinic start generated code]*/

static PyObject *
array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr,
Py_ssize_clean_t ustr_length)
/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/
array_array_fromunicode_impl(arrayobject *self, PyObject *ustr)
/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/
{
char typecode;

typecode = self->ob_descr->typecode;
if (typecode != 'u') {
if (self->ob_descr->typecode != 'u') {
PyErr_SetString(PyExc_ValueError,
"fromunicode() may only be called on "
"unicode type arrays");
return NULL;
}
if (ustr_length > 0) {

Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0);
assert(ustr_length > 0);
if (ustr_length > 1) {
ustr_length--; /* trim trailing NUL character */
Py_ssize_t old_size = Py_SIZE(self);
if (array_resize(self, old_size + ustr_length) == -1)
if (array_resize(self, old_size + ustr_length) == -1) {
return NULL;
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
ustr, ustr_length * sizeof(Py_UNICODE));
}

// must not fail
PyUnicode_AsWideChar(
ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length);
}

Py_RETURN_NONE;
Expand All @@ -1709,14 +1719,12 @@ static PyObject *
array_array_tounicode_impl(arrayobject *self)
/*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/
{
char typecode;
typecode = self->ob_descr->typecode;
if (typecode != 'u') {
if (self->ob_descr->typecode != 'u') {
PyErr_SetString(PyExc_ValueError,
"tounicode() may only be called on unicode type arrays");
return NULL;
}
return PyUnicode_FromWideChar((Py_UNICODE *) self->ob_item, Py_SIZE(self));
return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self));
}

/*[clinic input]
Expand Down Expand Up @@ -2675,30 +2683,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Py_DECREF(v);
}
else if (initial != NULL && PyUnicode_Check(initial)) {
Py_UNICODE *ustr;
Py_ssize_t n;

ustr = PyUnicode_AsUnicode(initial);
wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n);
if (ustr == NULL) {
PyErr_NoMemory();
Py_DECREF(a);
return NULL;
}

n = PyUnicode_GET_DATA_SIZE(initial);
if (n > 0) {
arrayobject *self = (arrayobject *)a;
char *item = self->ob_item;
item = (char *)PyMem_Realloc(item, n);
if (item == NULL) {
PyErr_NoMemory();
Py_DECREF(a);
return NULL;
}
self->ob_item = item;
Py_SET_SIZE(self, n / sizeof(Py_UNICODE));
memcpy(item, ustr, n);
self->allocated = Py_SIZE(self);
// self->ob_item may be NULL but it is safe.
PyMem_Free(self->ob_item);
self->ob_item = (char *)ustr;
Py_SET_SIZE(self, n);
self->allocated = n;
}
}
else if (initial != NULL && array_Check(initial) && len > 0) {
Expand Down
17 changes: 10 additions & 7 deletions Modules/clinic/arraymodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.