diff --git a/doc/release/upcoming_changes/8970.compatibility.rst b/doc/release/upcoming_changes/8970.compatibility.rst new file mode 100644 index 000000000000..3da9d1eb623b --- /dev/null +++ b/doc/release/upcoming_changes/8970.compatibility.rst @@ -0,0 +1,12 @@ +``PyArray_Descr.elsize`` is now ``-1`` for unsized flexible dtypes +------------------------------------------------------------------ +Previously it was ``0`` - but that made it impossible to distinguish unsized +types (``U``) from sized-to-0 types (``U0``). A new C macro, +``PyDataType_ISUNSIZED(descr)``, exists to check for this. + +From the python side, this manifests itself as:: + + >>> np.dtype(str).itemsize # None + >>> np.dtype('U').itemsize # None + >>> np.dtype('U0').itemsize + 0 diff --git a/doc/release/upcoming_changes/8970.improvement.rst b/doc/release/upcoming_changes/8970.improvement.rst new file mode 100644 index 000000000000..2cb1962c66ee --- /dev/null +++ b/doc/release/upcoming_changes/8970.improvement.rst @@ -0,0 +1,4 @@ +Empty flexible dtypes, such as ``S0``, ``U0``, and ``V0``, are now supported +---------------------------------------------------------------------------- +Previously, these were equivalent to ``S``, ``U``, and ``V``, which described +a dtype that was not yet assigned a size. diff --git a/doc/source/reference/c-api/types-and-structures.rst b/doc/source/reference/c-api/types-and-structures.rst index 8759af6a4638..fa8c1177d230 100644 --- a/doc/source/reference/c-api/types-and-structures.rst +++ b/doc/source/reference/c-api/types-and-structures.rst @@ -338,7 +338,7 @@ PyArrayDescr_Type and PyArray_Descr For data types that are always the same size (such as long), this holds the size of the data type. For flexible data types where different arrays can have a different elementsize, this should be - 0. + -1. .. c:member:: int alignment diff --git a/numpy/core/_dtype.py b/numpy/core/_dtype.py index 4249071ffe98..476f5003bc0e 100644 --- a/numpy/core/_dtype.py +++ b/numpy/core/_dtype.py @@ -62,7 +62,7 @@ def _unpack_field(dtype, offset, title=None): def _isunsized(dtype): # PyDataType_ISUNSIZED - return dtype.itemsize == 0 + return dtype.itemsize is None def _construction_repr(dtype, include_align=False, short=False): diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 6bf54938f2b2..2d2d7fd86b27 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1687,9 +1687,9 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags) #define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num) #define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL) #define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL) -#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == 0 && \ +#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == -1 && \ !PyDataType_HASFIELDS(dtype)) -#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = 0) +#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = -1) #define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj)) #define PyArray_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(PyArray_TYPE(obj)) diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 5da1b5f2902e..9f065fda1f4e 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -1659,7 +1659,7 @@ array_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) (int)dims.len, dims.ptr, strides.ptr, NULL, is_f_order, NULL, NULL, - 0, 1); + 0); if (ret == NULL) { descr = NULL; goto fail; @@ -1695,7 +1695,7 @@ array_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) subtype, descr, dims.len, dims.ptr, strides.ptr, offset + (char *)buffer.ptr, buffer.flags, NULL, buffer.base, - 0, 1); + 0); if (ret == NULL) { descr = NULL; goto fail; diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index ecaca72a1848..996cb6f0720a 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -4127,7 +4127,7 @@ static PyArray_Descr @from@_Descr = { /* type_num */ NPY_@from@, /* elsize */ - 0, + -1, /* alignment */ _ALIGN(@align@), /* subarray */ diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c index 29a2bb0e8c5f..fe10d78415fb 100644 --- a/numpy/core/src/multiarray/convert.c +++ b/numpy/core/src/multiarray/convert.c @@ -584,7 +584,7 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype) PyArray_NDIM(self), PyArray_DIMS(self), PyArray_STRIDES(self), PyArray_DATA(self), flags, (PyObject *)self, (PyObject *)self, - 0, 1); + 0); if (ret == NULL) { Py_XDECREF(type); return NULL; diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 05e45fbf52c5..70fb7ed50911 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -661,8 +661,7 @@ NPY_NO_EXPORT PyObject * PyArray_NewFromDescr_int( PyTypeObject *subtype, PyArray_Descr *descr, int nd, npy_intp const *dims, npy_intp const *strides, void *data, - int flags, PyObject *obj, PyObject *base, int zeroed, - int allow_emptystring) + int flags, PyObject *obj, PyObject *base, int zeroed) { PyArrayObject_fields *fa; int i; @@ -682,8 +681,7 @@ PyArray_NewFromDescr_int( ret = PyArray_NewFromDescr_int( subtype, descr, nd, newdims, newstrides, data, - flags, obj, base, - zeroed, allow_emptystring); + flags, obj, base, zeroed); return ret; } @@ -696,27 +694,31 @@ PyArray_NewFromDescr_int( } /* Check datatype element size */ - nbytes = descr->elsize; if (PyDataType_ISUNSIZED(descr)) { if (!PyDataType_ISFLEXIBLE(descr)) { PyErr_SetString(PyExc_TypeError, "Empty data-type"); Py_DECREF(descr); return NULL; } - else if (PyDataType_ISSTRING(descr) && !allow_emptystring && - data == NULL) { + else { PyArray_DESCR_REPLACE(descr); if (descr == NULL) { return NULL; } - if (descr->type_num == NPY_STRING) { - nbytes = descr->elsize = 1; - } - else { - nbytes = descr->elsize = sizeof(npy_ucs4); + switch (descr->type_num) { + case NPY_STRING: + descr->elsize = 1; + break; + case NPY_UNICODE: + descr->elsize = sizeof(npy_ucs4); + break; + case NPY_VOID: + default: + descr->elsize = 0; } } } + nbytes = descr->elsize; /* Check dimensions and multiply them to nbytes */ for (i = 0; i < nd; i++) { @@ -938,7 +940,7 @@ PyArray_NewFromDescrAndBase( { return PyArray_NewFromDescr_int(subtype, descr, nd, dims, strides, data, - flags, obj, base, 0, 0); + flags, obj, base, 0); } /* @@ -2774,7 +2776,7 @@ PyArray_Zeros(int nd, npy_intp const *dims, PyArray_Descr *type, int is_f_order) &PyArray_Type, type, nd, dims, NULL, NULL, is_f_order, NULL, NULL, - 1, 0); + 1); if (ret == NULL) { return NULL; @@ -3413,13 +3415,20 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep) Py_DECREF(dtype); return NULL; } + if (PyDataType_ISUNSIZED(dtype)) { + PyErr_SetString(PyExc_ValueError, + "Flexible dtypes must have an explicit size"); + Py_DECREF(dtype); + return NULL; + } + if (dtype->elsize == 0) { /* Nothing to read, just create an empty array of the requested type */ return PyArray_NewFromDescr_int( &PyArray_Type, dtype, 1, &num, NULL, NULL, 0, NULL, NULL, - 0, 1); + 0); } if ((sep == NULL) || (strlen(sep) == 0)) { ret = array_fromfile_binary(fp, dtype, num, &nread); diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h index 8db1412c71c9..5481a07583b9 100644 --- a/numpy/core/src/multiarray/ctors.h +++ b/numpy/core/src/multiarray/ctors.h @@ -17,8 +17,7 @@ NPY_NO_EXPORT PyObject * PyArray_NewFromDescr_int( PyTypeObject *subtype, PyArray_Descr *descr, int nd, npy_intp const *dims, npy_intp const *strides, void *data, - int flags, PyObject *obj, PyObject *base, int zeroed, - int allow_emptystring); + int flags, PyObject *obj, PyObject *base, int zeroed); NPY_NO_EXPORT PyObject * PyArray_NewLikeArrayWithShape( diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 24a3507bc408..5f715d0b4899 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -510,6 +510,11 @@ _convert_from_array_descr(PyObject *obj, int align) "Field elements must be tuples with at most 3 elements, got '%R'", item); goto fail; } + if (PyDataType_ISUNSIZED(conv)) { + PyErr_SetString(PyExc_ValueError, + "Flexible dtypes within compound dtypes must have a size"); + goto fail; + } if ((PyDict_GetItemWithError(fields, name) != NULL) || (title && PyUnicode_Check(title) @@ -652,6 +657,11 @@ _convert_from_list(PyObject *obj, int align) if (conv == NULL) { goto fail; } + if (PyDataType_ISUNSIZED(conv)) { + PyErr_SetString(PyExc_ValueError, + "Flexible dtypes within compound dtypes must have a size"); + goto fail; + } dtypeflags |= (conv->flags & NPY_FROM_FIELDS); if (align) { int _align = conv->alignment; @@ -1613,7 +1623,7 @@ _convert_from_str(PyObject *obj, int align) } int check_num = NPY_NOTYPE + 10; - int elsize = 0; + int elsize = -1; /* A typecode like 'd' */ if (len == 1) { /* Python byte string characters are unsigned */ @@ -1652,7 +1662,7 @@ _convert_from_str(PyObject *obj, int align) break; default: - if (elsize == 0) { + if (elsize == -1) { check_num = NPY_NOTYPE+10; } /* Support for generic processing c8, i4, f8, etc...*/ @@ -1850,8 +1860,6 @@ static PyMemberDef arraydescr_members[] = { T_INT, offsetof(PyArray_Descr, type_num), READONLY, NULL}, {"byteorder", T_CHAR, offsetof(PyArray_Descr, byteorder), READONLY, NULL}, - {"itemsize", - T_INT, offsetof(PyArray_Descr, elsize), READONLY, NULL}, {"alignment", T_INT, offsetof(PyArray_Descr, alignment), READONLY, NULL}, {"flags", @@ -1969,6 +1977,14 @@ arraydescr_ndim_get(PyArray_Descr *self) return PyLong_FromLong(ndim); } +static PyObject * +arraydescr_itemsize_get(PyArray_Descr *self) +{ + if (PyDataType_ISUNSIZED(self)) { + Py_RETURN_NONE; + } + return PyInt_FromLong(self->elsize); +} NPY_NO_EXPORT PyObject * arraydescr_protocol_descr_get(PyArray_Descr *self) @@ -2275,6 +2291,9 @@ static PyGetSetDef arraydescr_getsets[] = { {"hasobject", (getter)arraydescr_hasobject_get, NULL, NULL, NULL}, + {"itemsize", + (getter)arraydescr_itemsize_get, + NULL, NULL, NULL}, {NULL, NULL, NULL, NULL, NULL}, }; diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index af4e6c22e601..066de0207eed 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -593,7 +593,7 @@ wrap_copy_swap_function(int aligned, &PyArray_Type, dtype, 1, &shape, NULL, NULL, 0, NULL, NULL, - 0, 1); + 0); if (data->arr == NULL) { PyArray_free(data); return NPY_FAIL; @@ -1412,7 +1412,7 @@ get_legacy_dtype_cast_function( &PyArray_Type, tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, NULL, - 0, 1); + 0); if (data->aip == NULL) { PyArray_free(data); return NPY_FAIL; @@ -1439,7 +1439,7 @@ get_legacy_dtype_cast_function( &PyArray_Type, tmp_dtype, 1, &shape, NULL, NULL, 0, NULL, NULL, - 0, 1); + 0); if (data->aop == NULL) { Py_DECREF(data->aip); PyArray_free(data); diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src index 6ad375f670a5..9bfb53993526 100644 --- a/numpy/core/src/multiarray/einsum.c.src +++ b/numpy/core/src/multiarray/einsum.c.src @@ -322,7 +322,7 @@ get_single_op_view(PyArrayObject *op, char *labels, ndim_output, new_dims, new_strides, PyArray_DATA(op), PyArray_ISWRITEABLE(op) ? NPY_ARRAY_WRITEABLE : 0, (PyObject *)op, (PyObject *)op, - 0, 0); + 0); if (*ret == NULL) { return -1; diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index 3575d6fad54e..b6c2e7937c09 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -493,20 +493,31 @@ array_descr_set(PyArrayObject *self, PyObject *arg) Py_DECREF(safe); } - /* - * Viewing as an unsized void implies a void dtype matching the size of the - * current dtype. - */ - if (newtype->type_num == NPY_VOID && - PyDataType_ISUNSIZED(newtype) && - newtype->elsize != PyArray_DESCR(self)->elsize) { - PyArray_DESCR_REPLACE(newtype); - if (newtype == NULL) { + if (PyDataType_ISUNSIZED(newtype)) { + /* + * Viewing as an unsized void implies a void dtype matching the size of the + * current dtype. + * + * Viewing a type as an unsized version of itself is also fine. + */ + if (newtype->type_num == NPY_VOID || + newtype->type_num == PyArray_DESCR(self)->type_num) { + PyArray_DESCR_REPLACE(newtype); + if (newtype == NULL) { + return -1; + } + newtype->elsize = PyArray_DESCR(self)->elsize; + } + /* But no other flexible types */ + else { + PyErr_SetString(PyExc_ValueError, + "Flexible types must have explicit size"); + Py_DECREF(newtype); return -1; } - newtype->elsize = PyArray_DESCR(self)->elsize; } + /* Changing the size of the dtype results in a shape change */ if (newtype->elsize != PyArray_DESCR(self)->elsize) { int axis; diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index cb5c3823dccf..328fd56717a1 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1436,7 +1436,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) PyArray_BYTES(arr) + offset, PyArray_FLAGS(arr), (PyObject *)arr, (PyObject *)arr, - 0, 1); + 0); if (*view == NULL) { return 0; } @@ -1490,7 +1490,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view) PyArray_DATA(arr), PyArray_FLAGS(arr), (PyObject *)arr, (PyObject *)arr, - 0, 1); + 0); if (*view == NULL) { return 0; diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index e4421b41b7df..e1852809767d 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -403,7 +403,7 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset) PyArray_BYTES(self) + offset, PyArray_FLAGS(self) & ~NPY_ARRAY_F_CONTIGUOUS, (PyObject *)self, (PyObject *)self, - 0, 1); + 0); return ret; } @@ -1932,6 +1932,10 @@ array_setstate(PyArrayObject *self, PyObject *args) if (nd < 0) { return NULL; } + if (PyDataType_ISUNSIZED(PyArray_DESCR(self))) { + PyErr_SetString(PyExc_ValueError, "Missing data-type size."); + return NULL; + } size = PyArray_MultiplyList(dimensions, nd); if (size < 0) { /* More items than are addressable */ diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 1aad70dc65bb..614a92725bfd 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -2027,7 +2027,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) } else { if (obj == NULL) { - if (typecode->elsize == 0) { + if (PyDataType_ISUNSIZED(typecode)) { typecode->elsize = 1; } dptr = PyArray_malloc(typecode->elsize); diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 02c349759528..df7f8ab41c94 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -278,7 +278,7 @@ PyArray_Newshape(PyArrayObject *self, PyArray_Dims *newdims, Py_TYPE(self), PyArray_DESCR(self), ndim, dimensions, strides, PyArray_DATA(self), flags, (PyObject *)self, (PyObject *)self, - 0, 1); + 0); Py_DECREF(self); return (PyObject *)ret; } diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 24730f96974c..dd8beab308ff 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -81,7 +81,7 @@ def test_array_array(): # test array_struct interface a = np.array([(1, 4.0, 'Hello'), (2, 6.0, 'World')], - dtype=[('f0', int), ('f1', float), ('f2', str)]) + dtype=[('f0', int), ('f1', float), ('f2', str, 5)]) o = type("o", (object,), dict(__array_struct__=a.__array_struct__)) ## wasn't what I expected... is np.array(o) supposed to equal a ? diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index f725091c5067..e69376b36f6b 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -658,7 +658,7 @@ def test_datetime_string_conversion(self): assert_equal(dt_a, dt_b) # Datetime to string - assert_equal(str_a, dt_a.astype('S0')) + assert_equal(str_a, dt_a.astype('S')) str_b = np.empty_like(str_a) str_b[...] = dt_a assert_equal(str_a, str_b) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 6f8af175703c..c3a805246a1e 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -1565,7 +1565,8 @@ def test_view(self): zs = self._zeros(10, dt) # viewing as itself should be allowed - assert_equal(zs.view(dt).dtype, np.dtype(dt)) + assert_equal(zs.view(dt).dtype, zs.dtype) + assert_equal(zs.view(zs.dtype).dtype, zs.dtype) # viewing as any non-empty type gives an empty result assert_equal(zs.view((dt, 1)).shape, (0,)) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index f5428f98cb54..05d69a7fa414 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -2606,9 +2606,9 @@ class TestCreationFuncs: def setup(self): dtypes = {np.dtype(tp) for tp in itertools.chain(*np.sctypes.values())} # void, bytes, str - variable_sized = {tp for tp in dtypes if tp.str.endswith('0')} + variable_sized = {tp for tp in dtypes if tp.itemsize is None} self.dtypes = sorted(dtypes - variable_sized | - {np.dtype(tp.str.replace("0", str(i))) + {np.dtype((tp, i)) for tp in variable_sized for i in range(1, 10)}, key=lambda dtype: dtype.str) self.orders = {'C': 'c_contiguous', 'F': 'f_contiguous'} diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index f28ad5ac9098..0629ab683e41 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -313,7 +313,7 @@ def test_zero_width_strings(self): assert_equal(rec['f0'], ['test', 'test', 'test']) assert_equal(rec['f1'], ['', '', '']) - dt = np.dtype([('f0', '|S4'), ('f1', '|S')]) + dt = np.dtype([('f0', '|S4'), ('f1', '|S0')]) rec = np.rec.fromarrays(cols, dtype=dt) assert_equal(rec.itemsize, 4) assert_equal(rec['f0'], [b'test', b'test', b'test']) diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 2e731d4faa4d..31bb91b8cc23 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1870,7 +1870,7 @@ def test_string_astype(self): s3 = b'other' a = np.array([[s1], [s2], [s3]]) assert_equal(a.dtype, np.dtype('S5')) - b = a.astype(np.dtype('S0')) + b = a.astype(np.dtype('S')) assert_equal(b.dtype, np.dtype('S5')) def test_ticket_1756(self):