Skip to content

ENH/API: Change flexible types to indicate resizability with elsize == -1 #8970

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions doc/release/upcoming_changes/8970.compatibility.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
``PyArray_Descr.elsize`` is now ``-1`` for unsized flexible dtypes
------------------------------------------------------------------
Previously it was ``0`` - but that made it impossible to distinguish unsized
types (``U``) from sized-to-0 types (``U0``). A new C macro,
``PyDataType_ISUNSIZED(descr)``, exists to check for this.

From the python side, this manifests itself as::

>>> np.dtype(str).itemsize # None
>>> np.dtype('U').itemsize # None
>>> np.dtype('U0').itemsize
0
4 changes: 4 additions & 0 deletions doc/release/upcoming_changes/8970.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Empty flexible dtypes, such as ``S0``, ``U0``, and ``V0``, are now supported
----------------------------------------------------------------------------
Previously, these were equivalent to ``S``, ``U``, and ``V``, which described
a dtype that was not yet assigned a size.
2 changes: 1 addition & 1 deletion doc/source/reference/c-api/types-and-structures.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ PyArrayDescr_Type and PyArray_Descr
For data types that are always the same size (such as long), this
holds the size of the data type. For flexible data types where
different arrays can have a different elementsize, this should be
0.
-1.

.. c:member:: int alignment

Expand Down
2 changes: 1 addition & 1 deletion numpy/core/_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _unpack_field(dtype, offset, title=None):

def _isunsized(dtype):
# PyDataType_ISUNSIZED
return dtype.itemsize == 0
return dtype.itemsize is None


def _construction_repr(dtype, include_align=False, short=False):
Expand Down
4 changes: 2 additions & 2 deletions numpy/core/include/numpy/ndarraytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1687,9 +1687,9 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
#define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num)
#define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL)
#define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL)
#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == 0 && \
#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == -1 && \
!PyDataType_HASFIELDS(dtype))
#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = 0)
#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = -1)

#define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj))
#define PyArray_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(PyArray_TYPE(obj))
Expand Down
4 changes: 2 additions & 2 deletions numpy/core/src/multiarray/arrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1659,7 +1659,7 @@ array_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
(int)dims.len,
dims.ptr,
strides.ptr, NULL, is_f_order, NULL, NULL,
0, 1);
0);
if (ret == NULL) {
descr = NULL;
goto fail;
Expand Down Expand Up @@ -1695,7 +1695,7 @@ array_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
subtype, descr,
dims.len, dims.ptr, strides.ptr, offset + (char *)buffer.ptr,
buffer.flags, NULL, buffer.base,
0, 1);
0);
if (ret == NULL) {
descr = NULL;
goto fail;
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/arraytypes.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -4127,7 +4127,7 @@ static PyArray_Descr @from@_Descr = {
/* type_num */
NPY_@from@,
/* elsize */
0,
-1,
/* alignment */
_ALIGN(@align@),
/* subarray */
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
PyArray_NDIM(self), PyArray_DIMS(self), PyArray_STRIDES(self),
PyArray_DATA(self),
flags, (PyObject *)self, (PyObject *)self,
0, 1);
0);
if (ret == NULL) {
Py_XDECREF(type);
return NULL;
Expand Down
39 changes: 24 additions & 15 deletions numpy/core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -661,8 +661,7 @@ NPY_NO_EXPORT PyObject *
PyArray_NewFromDescr_int(
PyTypeObject *subtype, PyArray_Descr *descr, int nd,
npy_intp const *dims, npy_intp const *strides, void *data,
int flags, PyObject *obj, PyObject *base, int zeroed,
int allow_emptystring)
int flags, PyObject *obj, PyObject *base, int zeroed)
{
PyArrayObject_fields *fa;
int i;
Expand All @@ -682,8 +681,7 @@ PyArray_NewFromDescr_int(
ret = PyArray_NewFromDescr_int(
subtype, descr,
nd, newdims, newstrides, data,
flags, obj, base,
zeroed, allow_emptystring);
flags, obj, base, zeroed);
return ret;
}

Expand All @@ -696,27 +694,31 @@ PyArray_NewFromDescr_int(
}

/* Check datatype element size */
nbytes = descr->elsize;
if (PyDataType_ISUNSIZED(descr)) {
if (!PyDataType_ISFLEXIBLE(descr)) {
PyErr_SetString(PyExc_TypeError, "Empty data-type");
Py_DECREF(descr);
return NULL;
}
else if (PyDataType_ISSTRING(descr) && !allow_emptystring &&
data == NULL) {
else {
PyArray_DESCR_REPLACE(descr);
if (descr == NULL) {
return NULL;
}
if (descr->type_num == NPY_STRING) {
nbytes = descr->elsize = 1;
}
else {
nbytes = descr->elsize = sizeof(npy_ucs4);
switch (descr->type_num) {
case NPY_STRING:
descr->elsize = 1;
break;
case NPY_UNICODE:
descr->elsize = sizeof(npy_ucs4);
break;
case NPY_VOID:
default:
descr->elsize = 0;
}
}
}
nbytes = descr->elsize;

/* Check dimensions and multiply them to nbytes */
for (i = 0; i < nd; i++) {
Expand Down Expand Up @@ -938,7 +940,7 @@ PyArray_NewFromDescrAndBase(
{
return PyArray_NewFromDescr_int(subtype, descr, nd,
dims, strides, data,
flags, obj, base, 0, 0);
flags, obj, base, 0);
}

/*
Expand Down Expand Up @@ -2774,7 +2776,7 @@ PyArray_Zeros(int nd, npy_intp const *dims, PyArray_Descr *type, int is_f_order)
&PyArray_Type, type,
nd, dims, NULL, NULL,
is_f_order, NULL, NULL,
1, 0);
1);

if (ret == NULL) {
return NULL;
Expand Down Expand Up @@ -3413,13 +3415,20 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep)
Py_DECREF(dtype);
return NULL;
}
if (PyDataType_ISUNSIZED(dtype)) {
PyErr_SetString(PyExc_ValueError,
"Flexible dtypes must have an explicit size");
Py_DECREF(dtype);
return NULL;
}

if (dtype->elsize == 0) {
/* Nothing to read, just create an empty array of the requested type */
return PyArray_NewFromDescr_int(
&PyArray_Type, dtype,
1, &num, NULL, NULL,
0, NULL, NULL,
0, 1);
0);
}
if ((sep == NULL) || (strlen(sep) == 0)) {
ret = array_fromfile_binary(fp, dtype, num, &nread);
Expand Down
3 changes: 1 addition & 2 deletions numpy/core/src/multiarray/ctors.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ NPY_NO_EXPORT PyObject *
PyArray_NewFromDescr_int(
PyTypeObject *subtype, PyArray_Descr *descr, int nd,
npy_intp const *dims, npy_intp const *strides, void *data,
int flags, PyObject *obj, PyObject *base, int zeroed,
int allow_emptystring);
int flags, PyObject *obj, PyObject *base, int zeroed);

NPY_NO_EXPORT PyObject *
PyArray_NewLikeArrayWithShape(
Expand Down
27 changes: 23 additions & 4 deletions numpy/core/src/multiarray/descriptor.c
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,11 @@ _convert_from_array_descr(PyObject *obj, int align)
"Field elements must be tuples with at most 3 elements, got '%R'", item);
goto fail;
}
if (PyDataType_ISUNSIZED(conv)) {
PyErr_SetString(PyExc_ValueError,
"Flexible dtypes within compound dtypes must have a size");
goto fail;
}
if ((PyDict_GetItemWithError(fields, name) != NULL)
|| (title
&& PyUnicode_Check(title)
Expand Down Expand Up @@ -652,6 +657,11 @@ _convert_from_list(PyObject *obj, int align)
if (conv == NULL) {
goto fail;
}
if (PyDataType_ISUNSIZED(conv)) {
PyErr_SetString(PyExc_ValueError,
"Flexible dtypes within compound dtypes must have a size");
goto fail;
}
dtypeflags |= (conv->flags & NPY_FROM_FIELDS);
if (align) {
int _align = conv->alignment;
Expand Down Expand Up @@ -1613,7 +1623,7 @@ _convert_from_str(PyObject *obj, int align)
}

int check_num = NPY_NOTYPE + 10;
int elsize = 0;
int elsize = -1;
/* A typecode like 'd' */
if (len == 1) {
/* Python byte string characters are unsigned */
Expand Down Expand Up @@ -1652,7 +1662,7 @@ _convert_from_str(PyObject *obj, int align)
break;

default:
if (elsize == 0) {
if (elsize == -1) {
check_num = NPY_NOTYPE+10;
}
/* Support for generic processing c8, i4, f8, etc...*/
Expand Down Expand Up @@ -1850,8 +1860,6 @@ static PyMemberDef arraydescr_members[] = {
T_INT, offsetof(PyArray_Descr, type_num), READONLY, NULL},
{"byteorder",
T_CHAR, offsetof(PyArray_Descr, byteorder), READONLY, NULL},
{"itemsize",
T_INT, offsetof(PyArray_Descr, elsize), READONLY, NULL},
{"alignment",
T_INT, offsetof(PyArray_Descr, alignment), READONLY, NULL},
{"flags",
Expand Down Expand Up @@ -1969,6 +1977,14 @@ arraydescr_ndim_get(PyArray_Descr *self)
return PyLong_FromLong(ndim);
}

static PyObject *
arraydescr_itemsize_get(PyArray_Descr *self)
{
if (PyDataType_ISUNSIZED(self)) {
Py_RETURN_NONE;
}
return PyInt_FromLong(self->elsize);
}

NPY_NO_EXPORT PyObject *
arraydescr_protocol_descr_get(PyArray_Descr *self)
Expand Down Expand Up @@ -2275,6 +2291,9 @@ static PyGetSetDef arraydescr_getsets[] = {
{"hasobject",
(getter)arraydescr_hasobject_get,
NULL, NULL, NULL},
{"itemsize",
(getter)arraydescr_itemsize_get,
NULL, NULL, NULL},
{NULL, NULL, NULL, NULL, NULL},
};

Expand Down
6 changes: 3 additions & 3 deletions numpy/core/src/multiarray/dtype_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ wrap_copy_swap_function(int aligned,
&PyArray_Type, dtype,
1, &shape, NULL, NULL,
0, NULL, NULL,
0, 1);
0);
if (data->arr == NULL) {
PyArray_free(data);
return NPY_FAIL;
Expand Down Expand Up @@ -1412,7 +1412,7 @@ get_legacy_dtype_cast_function(
&PyArray_Type, tmp_dtype,
1, &shape, NULL, NULL,
0, NULL, NULL,
0, 1);
0);
if (data->aip == NULL) {
PyArray_free(data);
return NPY_FAIL;
Expand All @@ -1439,7 +1439,7 @@ get_legacy_dtype_cast_function(
&PyArray_Type, tmp_dtype,
1, &shape, NULL, NULL,
0, NULL, NULL,
0, 1);
0);
if (data->aop == NULL) {
Py_DECREF(data->aip);
PyArray_free(data);
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/einsum.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ get_single_op_view(PyArrayObject *op, char *labels,
ndim_output, new_dims, new_strides, PyArray_DATA(op),
PyArray_ISWRITEABLE(op) ? NPY_ARRAY_WRITEABLE : 0,
(PyObject *)op, (PyObject *)op,
0, 0);
0);

if (*ret == NULL) {
return -1;
Expand Down
31 changes: 21 additions & 10 deletions numpy/core/src/multiarray/getset.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,20 +493,31 @@ array_descr_set(PyArrayObject *self, PyObject *arg)
Py_DECREF(safe);
}

/*
* Viewing as an unsized void implies a void dtype matching the size of the
* current dtype.
*/
if (newtype->type_num == NPY_VOID &&
PyDataType_ISUNSIZED(newtype) &&
newtype->elsize != PyArray_DESCR(self)->elsize) {
PyArray_DESCR_REPLACE(newtype);
if (newtype == NULL) {
if (PyDataType_ISUNSIZED(newtype)) {
/*
* Viewing as an unsized void implies a void dtype matching the size of the
* current dtype.
*
* Viewing a type as an unsized version of itself is also fine.
*/
if (newtype->type_num == NPY_VOID ||
newtype->type_num == PyArray_DESCR(self)->type_num) {
PyArray_DESCR_REPLACE(newtype);
if (newtype == NULL) {
return -1;
}
newtype->elsize = PyArray_DESCR(self)->elsize;
}
/* But no other flexible types */
else {
PyErr_SetString(PyExc_ValueError,
"Flexible types must have explicit size");
Py_DECREF(newtype);
return -1;
}
newtype->elsize = PyArray_DESCR(self)->elsize;
}


/* Changing the size of the dtype results in a shape change */
if (newtype->elsize != PyArray_DESCR(self)->elsize) {
int axis;
Expand Down
4 changes: 2 additions & 2 deletions numpy/core/src/multiarray/mapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -1436,7 +1436,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
PyArray_BYTES(arr) + offset,
PyArray_FLAGS(arr),
(PyObject *)arr, (PyObject *)arr,
0, 1);
0);
if (*view == NULL) {
return 0;
}
Expand Down Expand Up @@ -1490,7 +1490,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
PyArray_DATA(arr),
PyArray_FLAGS(arr),
(PyObject *)arr, (PyObject *)arr,
0, 1);
0);

if (*view == NULL) {
return 0;
Expand Down
6 changes: 5 additions & 1 deletion numpy/core/src/multiarray/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ PyArray_GetField(PyArrayObject *self, PyArray_Descr *typed, int offset)
PyArray_BYTES(self) + offset,
PyArray_FLAGS(self) & ~NPY_ARRAY_F_CONTIGUOUS,
(PyObject *)self, (PyObject *)self,
0, 1);
0);
return ret;
}

Expand Down Expand Up @@ -1932,6 +1932,10 @@ array_setstate(PyArrayObject *self, PyObject *args)
if (nd < 0) {
return NULL;
}
if (PyDataType_ISUNSIZED(PyArray_DESCR(self))) {
PyErr_SetString(PyExc_ValueError, "Missing data-type size.");
return NULL;
}
size = PyArray_MultiplyList(dimensions, nd);
if (size < 0) {
/* More items than are addressable */
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2027,7 +2027,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
}
else {
if (obj == NULL) {
if (typecode->elsize == 0) {
if (PyDataType_ISUNSIZED(typecode)) {
typecode->elsize = 1;
}
dptr = PyArray_malloc(typecode->elsize);
Expand Down
Loading