From c9cd8f80275ef73900cf3a07550aee242e47be37 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 21 Apr 2017 10:37:18 +0100 Subject: [PATCH 1/4] API: Use elsize == -1 to indicate an unsized flexible dtype This allows empty strings to be unambigiously specified. Unsized strings continue to promote to single-character strings. --- .../upcoming_changes/8970.compatibility.rst | 12 +++++++ .../upcoming_changes/8970.improvement.rst | 4 +++ .../reference/c-api.types-and-structures.rst | 2 +- numpy/core/include/numpy/ndarraytypes.h | 4 +-- numpy/core/src/multiarray/arraytypes.c.src | 2 +- numpy/core/src/multiarray/ctors.c | 27 +++++++++++----- numpy/core/src/multiarray/descriptor.c | 4 +-- numpy/core/src/multiarray/getset.c | 31 +++++++++++++------ numpy/core/src/multiarray/methods.c | 4 +++ numpy/core/src/multiarray/multiarraymodule.c | 2 +- numpy/core/tests/test_datetime.py | 2 +- numpy/core/tests/test_multiarray.py | 3 +- numpy/core/tests/test_regression.py | 2 +- 13 files changed, 71 insertions(+), 28 deletions(-) create mode 100644 doc/release/upcoming_changes/8970.compatibility.rst create mode 100644 doc/release/upcoming_changes/8970.improvement.rst diff --git a/doc/release/upcoming_changes/8970.compatibility.rst b/doc/release/upcoming_changes/8970.compatibility.rst new file mode 100644 index 000000000000..3da9d1eb623b --- /dev/null +++ b/doc/release/upcoming_changes/8970.compatibility.rst @@ -0,0 +1,12 @@ +``PyArray_Descr.elsize`` is now ``-1`` for unsized flexible dtypes +------------------------------------------------------------------ +Previously it was ``0`` - but that made it impossible to distinguish unsized +types (``U``) from sized-to-0 types (``U0``). A new C macro, +``PyDataType_ISUNSIZED(descr)``, exists to check for this. + +From the python side, this manifests itself as:: + + >>> np.dtype(str).itemsize # None + >>> np.dtype('U').itemsize # None + >>> np.dtype('U0').itemsize + 0 diff --git a/doc/release/upcoming_changes/8970.improvement.rst b/doc/release/upcoming_changes/8970.improvement.rst new file mode 100644 index 000000000000..2cb1962c66ee --- /dev/null +++ b/doc/release/upcoming_changes/8970.improvement.rst @@ -0,0 +1,4 @@ +Empty flexible dtypes, such as ``S0``, ``U0``, and ``V0``, are now supported +---------------------------------------------------------------------------- +Previously, these were equivalent to ``S``, ``U``, and ``V``, which described +a dtype that was not yet assigned a size. diff --git a/doc/source/reference/c-api.types-and-structures.rst b/doc/source/reference/c-api.types-and-structures.rst index 255c348f9c8c..1c93345669e8 100644 --- a/doc/source/reference/c-api.types-and-structures.rst +++ b/doc/source/reference/c-api.types-and-structures.rst @@ -312,7 +312,7 @@ PyArrayDescr_Type For data types that are always the same size (such as long), this holds the size of the data type. For flexible data types where different arrays can have a different elementsize, this should be - 0. + -1. .. c:member:: int PyArray_Descr.alignment diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 8c5d855df7cf..60e21706c62d 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1676,8 +1676,8 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags) #define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num) #define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL) #define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL) -#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == 0) -#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = 0) +#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == -1) +#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = -1) #define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj)) #define PyArray_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(PyArray_TYPE(obj)) diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index dc89a2318090..470e94dd5c6c 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -4319,7 +4319,7 @@ static PyArray_Descr @from@_Descr = { /* type_num */ NPY_@from@, /* elsize */ - 0, + -1, /* alignment */ _ALIGN(@align@), /* subarray */ diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index d31a9cf74ab3..b668f81bda0d 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -933,27 +933,31 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd, } /* Check datatype element size */ - nbytes = descr->elsize; if (PyDataType_ISUNSIZED(descr)) { if (!PyDataType_ISFLEXIBLE(descr)) { PyErr_SetString(PyExc_TypeError, "Empty data-type"); Py_DECREF(descr); return NULL; } - else if (PyDataType_ISSTRING(descr) && !allow_emptystring && - data == NULL) { + else { PyArray_DESCR_REPLACE(descr); if (descr == NULL) { return NULL; } - if (descr->type_num == NPY_STRING) { - nbytes = descr->elsize = 1; - } - else { - nbytes = descr->elsize = sizeof(npy_ucs4); + switch (descr->type_num) { + case NPY_STRING: + descr->elsize = 1; + break; + case NPY_UNICODE: + descr->elsize = sizeof(npy_ucs4); + break; + case NPY_VOID: + default: + descr->elsize = 0; } } } + nbytes = descr->elsize; /* Check dimensions and multiply them to nbytes */ is_empty = 0; @@ -3396,6 +3400,13 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep) Py_DECREF(dtype); return NULL; } + if (PyDataType_ISUNSIZED(dtype)) { + PyErr_SetString(PyExc_ValueError, + "Flexible dtypes must have an explicit size"); + Py_DECREF(dtype); + return NULL; + } + if (dtype->elsize == 0) { /* Nothing to read, just create an empty array of the requested type */ return PyArray_NewFromDescr_int(&PyArray_Type, diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index e3c49b31480c..a8be72e8ebad 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1362,7 +1362,7 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) { int check_num = NPY_NOTYPE + 10; PyObject *item; - int elsize = 0; + int elsize = -1; char endian = '='; *at = NULL; @@ -1535,7 +1535,7 @@ PyArray_DescrConverter(PyObject *obj, PyArray_Descr **at) break; default: - if (elsize == 0) { + if (elsize == -1) { check_num = NPY_NOTYPE+10; } /* Support for generic processing c8, i4, f8, etc...*/ diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index a436750409db..15a8dea90302 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -469,20 +469,31 @@ array_descr_set(PyArrayObject *self, PyObject *arg) Py_DECREF(safe); } - /* - * Viewing as an unsized void implies a void dtype matching the size of the - * current dtype. - */ - if (newtype->type_num == NPY_VOID && - PyDataType_ISUNSIZED(newtype) && - newtype->elsize != PyArray_DESCR(self)->elsize) { - PyArray_DESCR_REPLACE(newtype); - if (newtype == NULL) { + if (PyDataType_ISUNSIZED(newtype)) { + /* + * Viewing as an unsized void implies a void dtype matching the size of the + * current dtype. + * + * Viewing a type as an unsized version of itself is also fine. + */ + if (newtype->type_num == NPY_VOID || + newtype->type_num == PyArray_DESCR(self)->type_num) { + PyArray_DESCR_REPLACE(newtype); + if (newtype == NULL) { + return -1; + } + newtype->elsize = PyArray_DESCR(self)->elsize; + } + /* But no other flexible types */ + else { + PyErr_SetString(PyExc_ValueError, + "Flexible types must have explicit size"); + Py_DECREF(newtype); return -1; } - newtype->elsize = PyArray_DESCR(self)->elsize; } + /* Changing the size of the dtype results in a shape change */ if (newtype->elsize != PyArray_DESCR(self)->elsize) { int axis; diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 6a121574bfcc..3b5660bb45cb 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -1715,6 +1715,10 @@ array_setstate(PyArrayObject *self, PyObject *args) if (nd < 0) { return NULL; } + if (PyDataType_ISUNSIZED(PyArray_DESCR(self))) { + PyErr_SetString(PyExc_ValueError, "Missing data-type size."); + return NULL; + } size = PyArray_MultiplyList(dimensions, nd); if (size < 0) { /* More items than are addressable */ diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 499ec343c392..6d626a9496d4 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1964,7 +1964,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) } else { if (obj == NULL) { - if (typecode->elsize == 0) { + if (PyDataType_ISUNSIZED(typecode)) { typecode->elsize = 1; } dptr = PyArray_malloc(typecode->elsize); diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index dc84a039c9dd..9741aa6503b3 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -529,7 +529,7 @@ def test_datetime_string_conversion(self): assert_equal(dt_a, dt_b) # Datetime to string - assert_equal(str_a, dt_a.astype('S0')) + assert_equal(str_a, dt_a.astype('S')) str_b = np.empty_like(str_a) str_b[...] = dt_a assert_equal(str_a, str_b) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index f6a5b4983911..c7846b2d3cbf 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -1288,7 +1288,8 @@ def test_view(self): zs = self._zeros(10, dt) # viewing as itself should be allowed - assert_equal(zs.view(dt).dtype, np.dtype(dt)) + assert_equal(zs.view(dt).dtype, zs.dtype) + assert_equal(zs.view(zs.dtype).dtype, zs.dtype) # viewing as any non-empty type gives an empty result assert_equal(zs.view((dt, 1)).shape, (0,)) diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index f791f6725806..570c7c1aa189 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1770,7 +1770,7 @@ def test_string_astype(self): s3 = b'other' a = np.array([[s1], [s2], [s3]]) assert_equal(a.dtype, np.dtype('S5')) - b = a.astype(np.dtype('S0')) + b = a.astype(np.dtype('S')) assert_equal(b.dtype, np.dtype('S5')) def test_ticket_1756(self): From 28858d52ad8d79163a3226672bcc5ca664fd8a8e Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 21 Apr 2017 10:52:57 +0100 Subject: [PATCH 2/4] ENH: Make .itemsize return none when .elsize == -1 --- numpy/core/src/multiarray/descriptor.c | 13 +++++++++++-- numpy/core/tests/test_numeric.py | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index a8be72e8ebad..76fd30fbfb9c 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1792,8 +1792,6 @@ static PyMemberDef arraydescr_members[] = { T_INT, offsetof(PyArray_Descr, type_num), READONLY, NULL}, {"byteorder", T_CHAR, offsetof(PyArray_Descr, byteorder), READONLY, NULL}, - {"itemsize", - T_INT, offsetof(PyArray_Descr, elsize), READONLY, NULL}, {"alignment", T_INT, offsetof(PyArray_Descr, alignment), READONLY, NULL}, {"flags", @@ -1965,6 +1963,14 @@ arraydescr_ndim_get(PyArray_Descr *self) return PyInt_FromLong(1); } +static PyObject * +arraydescr_itemsize_get(PyArray_Descr *self) +{ + if (PyDataType_ISUNSIZED(self)) { + Py_RETURN_NONE; + } + return PyInt_FromLong(self->elsize); +} NPY_NO_EXPORT PyObject * arraydescr_protocol_descr_get(PyArray_Descr *self) @@ -2249,6 +2255,9 @@ static PyGetSetDef arraydescr_getsets[] = { {"hasobject", (getter)arraydescr_hasobject_get, NULL, NULL, NULL}, + {"itemsize", + (getter)arraydescr_itemsize_get, + NULL, NULL, NULL}, {NULL, NULL, NULL, NULL, NULL}, }; diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index aaf01999c4d7..85c792e69c31 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -2042,9 +2042,9 @@ class TestCreationFuncs(object): def setup(self): dtypes = {np.dtype(tp) for tp in itertools.chain(*np.sctypes.values())} # void, bytes, str - variable_sized = {tp for tp in dtypes if tp.str.endswith('0')} + variable_sized = {tp for tp in dtypes if tp.itemsize is None} self.dtypes = sorted(dtypes - variable_sized | - {np.dtype(tp.str.replace("0", str(i))) + {np.dtype((tp, i)) for tp in variable_sized for i in range(1, 10)}, key=lambda dtype: dtype.str) self.orders = {'C': 'c_contiguous', 'F': 'f_contiguous'} From 67f8fe74819676d825312a59e11784105e182066 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 23 Apr 2017 11:11:59 +0100 Subject: [PATCH 3/4] MAINT: Remove allow_emptystring from PyArray_NewFromDescr_int This argument is no longer used --- numpy/core/src/multiarray/arrayobject.c | 4 ++-- numpy/core/src/multiarray/convert.c | 2 +- numpy/core/src/multiarray/ctors.c | 12 +++++------- numpy/core/src/multiarray/ctors.h | 3 +-- numpy/core/src/multiarray/dtype_transfer.c | 6 +++--- numpy/core/src/multiarray/mapping.c | 4 ++-- numpy/core/src/multiarray/methods.c | 2 +- numpy/core/src/multiarray/shape.c | 2 +- 8 files changed, 16 insertions(+), 19 deletions(-) diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 1d4816d967c2..ed8c7ad80aec 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -1465,7 +1465,7 @@ array_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) (int)dims.len, dims.ptr, strides.ptr, NULL, is_f_order, NULL, - 0, 1); + 0); if (ret == NULL) { descr = NULL; goto fail; @@ -1502,7 +1502,7 @@ array_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) dims.len, dims.ptr, strides.ptr, offset + (char *)buffer.ptr, - buffer.flags, NULL, 0, 1); + buffer.flags, NULL, 0); if (ret == NULL) { descr = NULL; goto fail; diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c index ca30d3f88c3a..3d7086effe3f 100644 --- a/numpy/core/src/multiarray/convert.c +++ b/numpy/core/src/multiarray/convert.c @@ -637,7 +637,7 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype) PyArray_STRIDES(self), PyArray_DATA(self), flags, - (PyObject *)self, 0, 1); + (PyObject *)self, 0); if (ret == NULL) { Py_XDECREF(type); return NULL; diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index b668f81bda0d..eefb8a9feecb 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -899,8 +899,7 @@ discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, NPY_NO_EXPORT PyObject * PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd, npy_intp *dims, npy_intp *strides, void *data, - int flags, PyObject *obj, int zeroed, - int allow_emptystring) + int flags, PyObject *obj, int zeroed) { PyArrayObject_fields *fa; int i, is_empty; @@ -919,8 +918,7 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd, newstrides, nd); ret = PyArray_NewFromDescr_int(subtype, descr, nd, newdims, newstrides, - data, flags, obj, zeroed, - allow_emptystring); + data, flags, obj, zeroed); return ret; } @@ -1148,7 +1146,7 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, { return PyArray_NewFromDescr_int(subtype, descr, nd, dims, strides, data, - flags, obj, 0, 0); + flags, obj, 0); } /*NUMPY_API @@ -2851,7 +2849,7 @@ PyArray_Zeros(int nd, npy_intp *dims, PyArray_Descr *type, int is_f_order) type, nd, dims, NULL, NULL, - is_f_order, NULL, 1, 0); + is_f_order, NULL, 1); if (ret == NULL) { return NULL; @@ -3413,7 +3411,7 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep) dtype, 1, &num, NULL, NULL, - 0, NULL, 0, 1); + 0, NULL, 0); } if ((sep == NULL) || (strlen(sep) == 0)) { ret = array_fromfile_binary(fp, dtype, num, &nread); diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h index e889910cbef4..01cc6c3d948a 100644 --- a/numpy/core/src/multiarray/ctors.h +++ b/numpy/core/src/multiarray/ctors.h @@ -9,8 +9,7 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, NPY_NO_EXPORT PyObject * PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd, npy_intp *dims, npy_intp *strides, void *data, - int flags, PyObject *obj, int zeroed, - int allow_emptystring); + int flags, PyObject *obj, int zeroed); NPY_NO_EXPORT PyObject *PyArray_New(PyTypeObject *, int nd, npy_intp *, int, npy_intp *, void *, int, int, PyObject *); diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 9c27255aa52d..4740d3f7976d 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -592,7 +592,7 @@ wrap_copy_swap_function(int aligned, */ Py_INCREF(dtype); data->arr = (PyArrayObject *)PyArray_NewFromDescr_int(&PyArray_Type, dtype, - 1, &shape, NULL, NULL, 0, NULL, 0, 1); + 1, &shape, NULL, NULL, 0, NULL, 0); if (data->arr == NULL) { PyArray_free(data); return NPY_FAIL; @@ -1448,7 +1448,7 @@ get_nbo_cast_transfer_function(int aligned, } } data->aip = (PyArrayObject *)PyArray_NewFromDescr_int(&PyArray_Type, - tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, 0, 1); + tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, 0); if (data->aip == NULL) { PyArray_free(data); return NPY_FAIL; @@ -1472,7 +1472,7 @@ get_nbo_cast_transfer_function(int aligned, } } data->aop = (PyArrayObject *)PyArray_NewFromDescr_int(&PyArray_Type, - tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, 0, 1); + tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, 0); if (data->aop == NULL) { Py_DECREF(data->aip); PyArray_free(data); diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 1a92365c8d8e..7cc0f1a13824 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1428,7 +1428,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) PyArray_STRIDES(arr), PyArray_BYTES(arr) + offset, PyArray_FLAGS(arr), - (PyObject *)arr, 0, 1); + (PyObject *)arr, 0); if (*view == NULL) { return 0; } @@ -1559,7 +1559,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) PyArray_STRIDES(arr), PyArray_DATA(arr), PyArray_FLAGS(arr), - (PyObject *)arr, 0, 1); + (PyObject *)arr, 0); if (*view == NULL) { return 0; } diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 3b5660bb45cb..32c25c0673cf 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -388,7 +388,7 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset) PyArray_STRIDES(self), PyArray_BYTES(self) + offset, PyArray_FLAGS(self)&(~NPY_ARRAY_F_CONTIGUOUS), - (PyObject *)self, 0, 1); + (PyObject *)self, 0); if (ret == NULL) { return NULL; } diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 40925d8b9ca0..d9748e381307 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -270,7 +270,7 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, ndim, dimensions, strides, PyArray_DATA(self), - flags, (PyObject *)self, 0, 1); + flags, (PyObject *)self, 0); if (ret == NULL) { goto fail; From 083bc1a2ad2631d1e3bf6ecc07f7e22c7c82c14e Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Fri, 21 Apr 2017 11:54:53 +0100 Subject: [PATCH 4/4] BUG: Disallow flexible dtypes in compound types Fixes #8969 --- numpy/core/src/multiarray/descriptor.c | 12 ++++++++++++ numpy/core/tests/test_api.py | 2 +- numpy/core/tests/test_records.py | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 76fd30fbfb9c..bea65050bf4b 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -510,6 +510,11 @@ _convert_from_array_descr(PyObject *obj, int align) if (ret == NPY_FAIL) { goto fail; } + if (PyDataType_ISUNSIZED(conv)) { + PyErr_SetString(PyExc_ValueError, + "Flexible dtypes within compound dtypes must have a size"); + goto fail; + } if ((PyDict_GetItem(fields, name) != NULL) || (title #if defined(NPY_PY3K) @@ -655,6 +660,13 @@ _convert_from_list(PyObject *obj, int align) Py_DECREF(key); goto fail; } + if (PyDataType_ISUNSIZED(conv)) { + PyErr_SetString(PyExc_ValueError, + "Flexible dtypes within compound dtypes must have a size"); + Py_DECREF(tup); + Py_DECREF(key); + goto fail; + } dtypeflags |= (conv->flags & NPY_FROM_FIELDS); PyTuple_SET_ITEM(tup, 0, (PyObject *)conv); if (align) { diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 10c2d5421556..2074f076b4bd 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -100,7 +100,7 @@ def test_array_array(): # test array_struct interface a = np.array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], - dtype=[('f0', int), ('f1', float), ('f2', str)]) + dtype=[('f0', int), ('f1', float), ('f2', str, 5)]) o = type("o", (object,), dict(__array_struct__=a.__array_struct__)) ## wasn't what I expected... is np.array(o) supposed to equal a ? diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index 27d35fa651a9..4eac390ca68b 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -286,7 +286,7 @@ def test_zero_width_strings(self): assert_equal(rec['f0'], ['test', 'test', 'test']) assert_equal(rec['f1'], ['', '', '']) - dt = np.dtype([('f0', '|S4'), ('f1', '|S')]) + dt = np.dtype([('f0', '|S4'), ('f1', '|S0')]) rec = np.rec.fromarrays(cols, dtype=dt) assert_equal(rec.itemsize, 4) assert_equal(rec['f0'], [b'test', b'test', b'test'])