diff --git a/doc/source/reference/arrays.dtypes.rst b/doc/source/reference/arrays.dtypes.rst index c89a282f40b2..2fc1add999ff 100644 --- a/doc/source/reference/arrays.dtypes.rst +++ b/doc/source/reference/arrays.dtypes.rst @@ -23,7 +23,7 @@ following aspects of the data: 2. what is the data-type of each :term:`field`, and 3. which part of the memory block each field takes. -5. If the data is a sub-array, what is its shape and data type. +5. If the data type is a sub-array, what is its shape and data type. .. index:: pair: dtype; scalar @@ -42,24 +42,28 @@ needed in Numpy. pair: dtype; field pair: dtype; record -Record data types are formed by creating a data type whose +Struct data types are formed by creating a data type whose :term:`fields` contain other data types. Each field has a name by which it can be :ref:`accessed `. The parent data type should be of sufficient size to contain all its fields; the -parent can for example be based on the :class:`void` type which allows -an arbitrary item size. Record data types may also contain other record -types and fixed-size sub-array data types in their fields. +parent is nearly always based on the :class:`void` type which allows +an arbitrary item size. Struct data types may also contain nested struct +sub-array data types in their fields. .. index:: pair: dtype; sub-array Finally, a data type can describe items that are themselves arrays of items of another data type. These sub-arrays must, however, be of a -fixed size. If an array is created using a data-type describing a -sub-array, the dimensions of the sub-array are appended to the shape +fixed size. + +If an array is created using a data-type describing a sub-array, +the dimensions of the sub-array are appended to the shape of the array when the array is created. Sub-arrays in a field of a record behave differently, see :ref:`arrays.indexing.rec`. +Sub-arrays always have a C-contiguous memory layout. + .. admonition:: Example A simple data type containing a 32-bit big-endian integer: @@ -226,7 +230,7 @@ Array-protocol type strings (see :ref:`arrays.interface`) ``'c'`` complex-floating point ``'S'``, ``'a'`` string ``'U'`` unicode - ``'V'`` anything (:class:`void`) + ``'V'`` raw data (:class:`void`) ================ ======================== .. admonition:: Example @@ -243,7 +247,7 @@ String with comma-separated fields A basic format in this context is an optional shape specifier followed by an array-protocol type string. Parenthesis are required - on the shape if it is greater than 1-d. NumPy allows a modification + on the shape if it has more than one dimension. NumPy allows a modification on the format in that any string that can uniquely identify the type can be used to specify the data-type in a field. The generated data-type fields are named ``'f0'``, ``'f1'``, ..., @@ -283,8 +287,8 @@ Type strings ``(flexible_dtype, itemsize)`` The first argument must be an object that is converted to a - flexible data-type object (one whose element size is 0), the - second argument is an integer providing the desired itemsize. + zero-sized flexible data-type object, the second argument is + an integer providing the desired itemsize. .. admonition:: Example @@ -309,30 +313,6 @@ Type strings >>> dt = np.dtype(('S10', 1)) # 10-character string >>> dt = np.dtype(('i4, (2,3)f8, f4', (2,3))) # 2 x 3 record sub-array -``(base_dtype, new_dtype)`` - - Both arguments must be convertible to data-type objects in this - case. The *base_dtype* is the data-type object that the new - data-type builds on. This is how you could assign named fields to - any built-in data-type object. - - .. admonition:: Example - - 32-bit integer, whose first two bytes are interpreted as an integer - via field ``real``, and the following two bytes via field ``imag``. - - >>> dt = np.dtype((np.int32,{'real':(np.int16, 0),'imag':(np.int16, 2)}) - - 32-bit integer, which is interpreted as consisting of a sub-array - of shape ``(4,)`` containing 8-bit integers: - - >>> dt = np.dtype((np.int32, (np.int8, 4))) - - 32-bit integer, containing fields ``r``, ``g``, ``b``, ``a`` that - interpret the 4 bytes in the integer as four unsigned integers: - - >>> dt = np.dtype(('i4', [('r','u1'),('g','u1'),('b','u1'),('a','u1')])) - .. index:: triple: dtype; construction; from list @@ -376,15 +356,15 @@ Type strings .. index:: triple: dtype; construction; from dict -``{'names': ..., 'formats': ..., 'offsets': ..., 'titles': ...}`` +``{'names': ..., 'formats': ..., 'offsets': ..., 'titles': ..., 'itemsize': ...}`` - This style has two required and two optional keys. The *names* + This style has two required and three optional keys. The *names* and *formats* keys are required. Their respective values are equal-length lists with the field names and the field formats. The field names must be strings and the field formats can be any object accepted by :class:`dtype` constructor. - The optional keys in the dictionary are *offsets* and *titles* and + When the optional keys *offsets* and *titles* are provided, their values must each be lists of the same length as the *names* and *formats* lists. The *offsets* value is a list of byte offsets (integers) for each field, while the *titles* value is a list of @@ -395,6 +375,11 @@ Type strings field tuple which will contain the title as an additional tuple member. + The *itemsize* key allows the total size of the dtype to be + set, and must be an integer large enough so all the fields + are within the dtype. If the dtype being constructed is aligned, + the *itemsize* must also be divisible by the struct alignment. + .. admonition:: Example Data type with fields ``r``, ``g``, ``b``, ``a``, each being @@ -414,6 +399,11 @@ Type strings ``{'field1': ..., 'field2': ..., ...}`` + This usage is discouraged, because it is ambiguous with the + other dict-based construction method. If you have a field + called 'names' and a field called 'formats' there will be + a conflict. + This style allows passing in the :attr:`fields ` attribute of a data-type object. @@ -429,6 +419,34 @@ Type strings >>> dt = np.dtype({'col1': ('S10', 0), 'col2': (float32, 10), 'col3': (int, 14)}) +``(base_dtype, new_dtype)`` + + This usage is discouraged. In NumPy 1.7 and later, it is possible + to specify struct dtypes with overlapping fields, functioning like + the 'union' type in C. The union mechanism is preferred. + + Both arguments must be convertible to data-type objects in this + case. The *base_dtype* is the data-type object that the new + data-type builds on. This is how you could assign named fields to + any built-in data-type object. + + .. admonition:: Example + + 32-bit integer, whose first two bytes are interpreted as an integer + via field ``real``, and the following two bytes via field ``imag``. + + >>> dt = np.dtype((np.int32,{'real':(np.int16, 0),'imag':(np.int16, 2)}) + + 32-bit integer, which is interpreted as consisting of a sub-array + of shape ``(4,)`` containing 8-bit integers: + + >>> dt = np.dtype((np.int32, (np.int8, 4))) + + 32-bit integer, containing fields ``r``, ``g``, ``b``, ``a`` that + interpret the 4 bytes in the integer as four unsigned integers: + + >>> dt = np.dtype(('i4', [('r','u1'),('g','u1'),('b','u1'),('a','u1')])) + :class:`dtype` ============== diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 58594b15aebf..760d258f2de5 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -5563,7 +5563,8 @@ def luf(lamdaexpr, *args, **kwargs): align : bool, optional Add padding to the fields to match what a C compiler would output for a similar C-struct. Can be ``True`` only if `obj` is a dictionary - or a comma-separated string. + or a comma-separated string. If a struct dtype is being created, + this also sets a sticky alignment flag ``isalignedstruct``. copy : bool, optional Make a new copy of the data-type object. If ``False``, the result may just be a reference to a built-in data-type object. @@ -5787,6 +5788,14 @@ def luf(lamdaexpr, *args, **kwargs): """)) +add_newdoc('numpy.core.multiarray', 'dtype', ('isalignedstruct', + """ + Boolean indicating whether the dtype is a struct which maintains + field alignment. This flag is sticky, so when combining multiple + structs together, it is preserved and produces new dtypes which + are also aligned. + """)) + add_newdoc('numpy.core.multiarray', 'dtype', ('itemsize', """ The element size of this data-type object. diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index a5b6d117a6a3..99e64d47558d 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -12,7 +12,7 @@ else: _nbo = asbytes('>') -def _makenames_list(adict): +def _makenames_list(adict, align): from multiarray import dtype allfields = [] fnames = adict.keys() @@ -26,7 +26,7 @@ def _makenames_list(adict): num = int(obj[1]) if (num < 0): raise ValueError("invalid offset.") - format = dtype(obj[0]) + format = dtype(obj[0], align=align) if (format.itemsize == 0): raise ValueError("all itemsizes must be fixed.") if (n > 2): @@ -53,7 +53,7 @@ def _usefields(adict, align): except KeyError: names = None if names is None: - names, formats, offsets, titles = _makenames_list(adict) + names, formats, offsets, titles = _makenames_list(adict, align) else: formats = [] offsets = [] diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 701afee82240..7728b4b9c755 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -502,7 +502,8 @@ typedef struct { #define NPY_USE_GETITEM 0x20 /* Use f.setitem when setting creating 0-d array from this data-type.*/ #define NPY_USE_SETITEM 0x40 -/* define NPY_IS_COMPLEX */ +/* A sticky flag specifically for structured arrays */ +#define NPY_ALIGNED_STRUCT 0x80 /* *These are inherited for global data-type if any data-types in the @@ -1344,6 +1345,7 @@ PyArrayNeighborhoodIter_Next2D(PyArrayNeighborhoodIterObject* iter); #define PyDataType_ISEXTENDED(obj) PyTypeNum_ISEXTENDED(((PyArray_Descr*)(obj))->type_num) #define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num) #define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL) +#define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL) #define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj)) #define PyArray_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(PyArray_TYPE(obj)) diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 28ce3360522a..3a0f22743fe2 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -544,7 +544,7 @@ VOID_getitem(char *ip, PyArrayObject *ap) int itemsize; descr = ap->descr; - if (descr->names != NULL) { + if (PyDataType_HASFIELDS(descr)) { PyObject *key; PyObject *names; int i, n; @@ -3302,10 +3302,14 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { (PyArray_FillWithScalarFunc*)NULL, #if @sort@ { - quicksort_@suff@, heapsort_@suff@, mergesort_@suff@ + (PyArray_SortFunc *)quicksort_@suff@, + (PyArray_SortFunc *)heapsort_@suff@, + (PyArray_SortFunc *)mergesort_@suff@ }, { - aquicksort_@suff@, aheapsort_@suff@, amergesort_@suff@ + (PyArray_ArgSortFunc *)aquicksort_@suff@, + (PyArray_ArgSortFunc *)aheapsort_@suff@, + (PyArray_ArgSortFunc *)amergesort_@suff@ }, #else { @@ -3406,10 +3410,14 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { (PyArray_FillWithScalarFunc*)@from@_fillwithscalar, #if @sort@ { - quicksort_@suff@, heapsort_@suff@, mergesort_@suff@ + (PyArray_SortFunc *)quicksort_@suff@, + (PyArray_SortFunc *)heapsort_@suff@, + (PyArray_SortFunc *)mergesort_@suff@ }, { - aquicksort_@suff@, aheapsort_@suff@, amergesort_@suff@ + (PyArray_ArgSortFunc *)aquicksort_@suff@, + (PyArray_ArgSortFunc *)aheapsort_@suff@, + (PyArray_ArgSortFunc *)amergesort_@suff@ }, #else { diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 6016917115c4..222ab9dd60a7 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -15,9 +15,19 @@ #include "_datetime.h" #include "common.h" +#include "descriptor.h" -#define _chk_byteorder(arg) (arg == '>' || arg == '<' || \ - arg == '|' || arg == '=') +/* + * offset: A starting offset. + * alignment: A power-of-two alignment. + * + * This macro returns the smallest value >= 'offset' + * which is divisible by 'alignment'. Because 'alignment' + * is a power of two, and integers are twos-complement, + * can use some simple bit-fiddling to do this. + */ +#define NPY_NEXT_ALIGNED_OFFSET(offset, alignment) \ + (((offset) + (alignment) - 1) & (-(alignment))) static PyObject *typeDict = NULL; /* Must be explicitly loaded */ @@ -102,6 +112,9 @@ array_set_typeDict(PyObject *NPY_UNUSED(ignored), PyObject *args) return Py_None; } +#define _chk_byteorder(arg) (arg == '>' || arg == '<' || \ + arg == '|' || arg == '=') + static int _check_for_commastring(char *type, Py_ssize_t len) { @@ -135,6 +148,8 @@ _check_for_commastring(char *type, Py_ssize_t len) return 0; } +#undef _chk_byteorder + static int is_datetime_typestr(char *type, Py_ssize_t len) { @@ -159,10 +174,6 @@ is_datetime_typestr(char *type, Py_ssize_t len) return 0; } - - -#undef _chk_byteorder - static PyArray_Descr * _convert_from_tuple(PyObject *obj) { @@ -362,14 +373,25 @@ _convert_from_array_descr(PyObject *obj, int align) /* Process rest */ if (PyTuple_GET_SIZE(item) == 2) { - ret = PyArray_DescrConverter(PyTuple_GET_ITEM(item, 1), &conv); + if (align) { + ret = PyArray_DescrAlignConverter(PyTuple_GET_ITEM(item, 1), + &conv); + } + else { + ret = PyArray_DescrConverter(PyTuple_GET_ITEM(item, 1), &conv); + } if (ret == PY_FAIL) { PyObject_Print(PyTuple_GET_ITEM(item, 1), stderr, 0); } } else if (PyTuple_GET_SIZE(item) == 3) { newobj = PyTuple_GetSlice(item, 1, 3); - ret = PyArray_DescrConverter(newobj, &conv); + if (align) { + ret = PyArray_DescrAlignConverter(newobj, &conv); + } + else { + ret = PyArray_DescrConverter(newobj, &conv); + } Py_DECREF(newobj); } else { @@ -379,15 +401,13 @@ _convert_from_array_descr(PyObject *obj, int align) goto fail; } if ((PyDict_GetItem(fields, name) != NULL) -#if defined(NPY_PY3K) || (title +#if defined(NPY_PY3K) && PyUString_Check(title) - && (PyDict_GetItem(fields, title) != NULL))) { #else - || (title && (PyUString_Check(title) || PyUnicode_Check(title)) - && (PyDict_GetItem(fields, title) != NULL))) { #endif + && (PyDict_GetItem(fields, title) != NULL))) { PyErr_SetString(PyExc_ValueError, "two fields with the same name"); goto fail; @@ -400,7 +420,7 @@ _convert_from_array_descr(PyObject *obj, int align) _align = conv->alignment; if (_align > 1) { - totalsize = ((totalsize + _align - 1)/_align)*_align; + totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, _align); } maxalign = MAX(maxalign, _align); } @@ -433,15 +453,25 @@ _convert_from_array_descr(PyObject *obj, int align) totalsize += conv->elsize; Py_DECREF(tup); } + + if (maxalign > 1) { + totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, maxalign); + } + new = PyArray_DescrNewFromType(PyArray_VOID); + if (new == NULL) { + Py_XDECREF(fields); + Py_XDECREF(nameslist); + return NULL; + } new->fields = fields; new->names = nameslist; new->elsize = totalsize; new->flags=dtypeflags; - if (maxalign > 1) { - totalsize = ((totalsize + maxalign - 1)/maxalign)*maxalign; - } + + /* Structured arrays get a sticky aligned bit */ if (align) { + new->flags |= NPY_ALIGNED_STRUCT; new->alignment = maxalign; } return new; @@ -494,7 +524,12 @@ _convert_from_list(PyObject *obj, int align) for (i = 0; i < n; i++) { tup = PyTuple_New(2); key = PyUString_FromFormat("f%d", i); - ret = PyArray_DescrConverter(PyList_GET_ITEM(obj, i), &conv); + if (align) { + ret = PyArray_DescrAlignConverter(PyList_GET_ITEM(obj, i), &conv); + } + else { + ret = PyArray_DescrConverter(PyList_GET_ITEM(obj, i), &conv); + } if (ret == PY_FAIL) { Py_DECREF(tup); Py_DECREF(key); @@ -507,7 +542,7 @@ _convert_from_list(PyObject *obj, int align) _align = conv->alignment; if (_align > 1) { - totalsize = ((totalsize + _align - 1)/_align)*_align; + totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, _align); } maxalign = MAX(maxalign, _align); } @@ -520,11 +555,13 @@ _convert_from_list(PyObject *obj, int align) new = PyArray_DescrNewFromType(PyArray_VOID); new->fields = fields; new->names = nameslist; - new->flags=dtypeflags; + new->flags = dtypeflags; if (maxalign > 1) { - totalsize = ((totalsize+maxalign-1)/maxalign)*maxalign; + totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, maxalign); } + /* Structured arrays get a sticky aligned bit */ if (align) { + new->flags |= NPY_ALIGNED_STRUCT; new->alignment = maxalign; } new->elsize = totalsize; @@ -570,8 +607,10 @@ _convert_from_commastring(PyObject *obj, int align) return NULL; } if (PyList_GET_SIZE(listobj) == 1) { - if (PyArray_DescrConverter( - PyList_GET_ITEM(listobj, 0), &res) == NPY_FAIL) { + int retcode; + retcode = PyArray_DescrConverter(PyList_GET_ITEM(listobj, 0), + &res); + if (retcode == NPY_FAIL) { res = NULL; } } @@ -641,7 +680,7 @@ _use_inherit(PyArray_Descr *type, PyObject *newobj, int *errflag) goto fail; } new->elsize = conv->elsize; - if (conv->names) { + if (PyDataType_HASFIELDS(conv)) { new->fields = conv->fields; Py_XINCREF(new->fields); new->names = conv->names; @@ -657,6 +696,77 @@ _use_inherit(PyArray_Descr *type, PyObject *newobj, int *errflag) return NULL; } +/* + * Validates that any field of the structured array 'dtype' which has + * the NPY_ITEM_HASOBJECT flag set does not overlap with another field. + * + * This algorithm is worst case O(n^2). It could be done with a sort + * and sweep algorithm, but the structured dtype representation is + * rather ugly right now, so writing something better can wait until + * that representation is made sane. + * + * Returns 0 on success, -1 if an exception is raised. + */ +static int +validate_object_field_overlap(PyArray_Descr *dtype) +{ + PyObject *names, *fields, *key, *tup, *title; + Py_ssize_t i, j, names_size; + PyArray_Descr *fld_dtype, *fld2_dtype; + int fld_offset, fld2_offset, align; + + /* Get some properties from the dtype */ + names = dtype->names; + names_size = PyTuple_GET_SIZE(names); + fields = dtype->fields; + align = PyDataType_FLAGCHK(dtype, NPY_ALIGNED_STRUCT); + + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + if (key == NULL) { + return -1; + } + tup = PyDict_GetItem(fields, key); + if (tup == NULL) { + return -1; + } + if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) { + return -1; + } + + /* If this field has objects, check for overlaps */ + if (PyDataType_REFCHK(fld_dtype)) { + for (j = 0; j < names_size; ++j) { + if (i != j) { + key = PyTuple_GET_ITEM(names, j); + if (key == NULL) { + return -1; + } + tup = PyDict_GetItem(fields, key); + if (tup == NULL) { + return -1; + } + if (!PyArg_ParseTuple(tup, "Oi|O", &fld2_dtype, + &fld2_offset, &title)) { + return -1; + } + /* Raise an exception if it overlaps */ + if (fld_offset < fld2_offset + fld2_dtype->elsize && + fld2_offset < fld_offset + fld_dtype->elsize) { + PyErr_SetString(PyExc_TypeError, + "Cannot create a NumPy dtype with overlapping " + "object fields"); + return -1; + } + } + } + } + } + + /* It passed all the overlap tests */ + return 0; +} + /* * a dictionary specifying a data-type * must have at least two and up to four @@ -715,12 +825,13 @@ _convert_from_dict(PyObject *obj, int align) { PyArray_Descr *new; PyObject *fields = NULL; - PyObject *names, *offsets, *descrs, *titles; + PyObject *names, *offsets, *descrs, *titles, *tmp; PyObject *metadata; int n, i; - int totalsize; + int totalsize, itemsize; int maxalign = 0; int dtypeflags = 0; + int has_out_of_order_fields = 0; fields = PyDict_New(); if (fields == NULL) { @@ -745,21 +856,21 @@ _convert_from_dict(PyObject *obj, int align) totalsize = 0; for (i = 0; i < n; i++) { - PyObject *tup, *descr, *index, *item, *name, *off; + PyObject *tup, *descr, *index, *title, *name, *off; int len, ret, _align = 1; PyArray_Descr *newdescr; /* Build item to insert (descr, offset, [title])*/ len = 2; - item = NULL; + title = NULL; index = PyInt_FromLong(i); if (titles) { - item=PyObject_GetItem(titles, index); - if (item && item != Py_None) { + title=PyObject_GetItem(titles, index); + if (title && title != Py_None) { len = 3; } else { - Py_XDECREF(item); + Py_XDECREF(title); } PyErr_Clear(); } @@ -768,7 +879,12 @@ _convert_from_dict(PyObject *obj, int align) if (!descr) { goto fail; } - ret = PyArray_DescrConverter(descr, &newdescr); + if (align) { + ret = PyArray_DescrAlignConverter(descr, &newdescr); + } + else { + ret = PyArray_DescrConverter(descr, &newdescr); + } Py_DECREF(descr); if (ret == PY_FAIL) { Py_DECREF(tup); @@ -788,23 +904,32 @@ _convert_from_dict(PyObject *obj, int align) } offset = PyInt_AsLong(off); PyTuple_SET_ITEM(tup, 1, off); + /* Flag whether the fields are specified out of order */ if (offset < totalsize) { - PyErr_SetString(PyExc_ValueError, - "invalid offset (must be ordered)"); + has_out_of_order_fields = 1; + } + /* If align=True, enforce field alignment */ + if (align && offset % newdescr->alignment != 0) { + PyErr_Format(PyExc_ValueError, + "offset %d for NumPy dtype with fields is " + "not divisible by the field alignment %d " + "with align=True", + (int)offset, (int)newdescr->alignment); ret = PY_FAIL; } - if (offset > totalsize) { - totalsize = offset; + else if (offset + newdescr->elsize > totalsize) { + totalsize = offset + newdescr->elsize; } } else { if (align && _align > 1) { - totalsize = ((totalsize + _align - 1)/_align)*_align; + totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, _align); } PyTuple_SET_ITEM(tup, 1, PyInt_FromLong(totalsize)); + totalsize += newdescr->elsize; } if (len == 3) { - PyTuple_SET_ITEM(tup, 2, item); + PyTuple_SET_ITEM(tup, 2, title); } name = PyObject_GetItem(names, index); if (!name) { @@ -831,16 +956,16 @@ _convert_from_dict(PyObject *obj, int align) Py_DECREF(name); if (len == 3) { #if defined(NPY_PY3K) - if (PyUString_Check(item)) { + if (PyUString_Check(title)) { #else - if (PyUString_Check(item) || PyUnicode_Check(item)) { + if (PyUString_Check(title) || PyUnicode_Check(title)) { #endif - if (PyDict_GetItem(fields, item) != NULL) { + if (PyDict_GetItem(fields, title) != NULL) { PyErr_SetString(PyExc_ValueError, "title already used as a name or title."); ret=PY_FAIL; } - PyDict_SetItem(fields, item, tup); + PyDict_SetItem(fields, title, tup); } } Py_DECREF(tup); @@ -848,15 +973,14 @@ _convert_from_dict(PyObject *obj, int align) goto fail; } dtypeflags |= (newdescr->flags & NPY_FROM_FIELDS); - totalsize += newdescr->elsize; } - new = PyArray_DescrNewFromType(PyArray_VOID); + new = PyArray_DescrNewFromType(NPY_VOID); if (new == NULL) { goto fail; } if (maxalign > 1) { - totalsize = ((totalsize + maxalign - 1)/maxalign)*maxalign; + totalsize = NPY_NEXT_ALIGNED_OFFSET(totalsize, maxalign); } if (align) { new->alignment = maxalign; @@ -872,6 +996,53 @@ _convert_from_dict(PyObject *obj, int align) new->fields = fields; new->flags = dtypeflags; + /* + * If the fields weren't in order, and there was an OBJECT type, + * need to verify that no OBJECT types overlap with something else. + */ + if (has_out_of_order_fields && PyDataType_REFCHK(new)) { + if (validate_object_field_overlap(new) < 0) { + Py_DECREF(new); + return NULL; + } + } + + /* Structured arrays get a sticky aligned bit */ + if (align) { + new->flags |= NPY_ALIGNED_STRUCT; + } + + /* Override the itemsize if provided */ + tmp = PyDict_GetItemString(obj, "itemsize"); + if (tmp != NULL) { + itemsize = (int)PyInt_AsLong(tmp); + if (itemsize == -1 && PyErr_Occurred()) { + Py_DECREF(new); + return NULL; + } + /* Make sure the itemsize isn't made too small */ + if (itemsize < new->elsize) { + PyErr_Format(PyExc_ValueError, + "NumPy dtype descriptor requires %d bytes, " + "cannot override to smaller itemsize of %d", + (int)new->elsize, (int)itemsize); + Py_DECREF(new); + return NULL; + } + /* If align is set, make sure the alignment divides into the size */ + if (align && itemsize % new->alignment != 0) { + PyErr_Format(PyExc_ValueError, + "NumPy dtype descriptor requires alignment of %d bytes, " + "which is not divisible into the specified itemsize %d", + (int)new->alignment, (int)itemsize); + Py_DECREF(new); + return NULL; + } + /* Set the itemsize */ + new->elsize = itemsize; + } + + /* Add the metadata if provided */ metadata = PyDict_GetItemString(obj, "metadata"); if (new->metadata == NULL) { @@ -1326,7 +1497,7 @@ static PyMemberDef arraydescr_members[] = { static PyObject * arraydescr_subdescr_get(PyArray_Descr *self) { - if (self->subarray == NULL) { + if (!PyDataType_HASSUBARRAY(self)) { Py_INCREF(Py_None); return Py_None; } @@ -1422,7 +1593,7 @@ arraydescr_typename_get(PyArray_Descr *self) static PyObject * arraydescr_base_get(PyArray_Descr *self) { - if (self->subarray == NULL) { + if (!PyDataType_HASSUBARRAY(self)) { Py_INCREF(self); return (PyObject *)self; } @@ -1433,7 +1604,7 @@ arraydescr_base_get(PyArray_Descr *self) static PyObject * arraydescr_shape_get(PyArray_Descr *self) { - if (self->subarray == NULL) { + if (!PyDataType_HASSUBARRAY(self)) { return PyTuple_New(0); } /*TODO @@ -1453,7 +1624,7 @@ arraydescr_protocol_descr_get(PyArray_Descr *self) PyObject *dobj, *res; PyObject *_numpy_internal; - if (self->names == NULL) { + if (!PyDataType_HASFIELDS(self)) { /* get default */ dobj = PyTuple_New(2); if (dobj == NULL) { @@ -1501,7 +1672,7 @@ arraydescr_isbuiltin_get(PyArray_Descr *self) static int _arraydescr_isnative(PyArray_Descr *self) { - if (self->names == NULL) { + if (!PyDataType_HASFIELDS(self)) { return PyArray_ISNBO(self->byteorder); } else { @@ -1545,10 +1716,19 @@ arraydescr_isnative_get(PyArray_Descr *self) return ret; } +static PyObject * +arraydescr_isalignedstruct_get(PyArray_Descr *self) +{ + PyObject *ret; + ret = (self->flags&NPY_ALIGNED_STRUCT) ? Py_True : Py_False; + Py_INCREF(ret); + return ret; +} + static PyObject * arraydescr_fields_get(PyArray_Descr *self) { - if (self->names == NULL) { + if (!PyDataType_HASFIELDS(self)) { Py_INCREF(Py_None); return Py_None; } @@ -1579,7 +1759,7 @@ arraydescr_hasobject_get(PyArray_Descr *self) static PyObject * arraydescr_names_get(PyArray_Descr *self) { - if (self->names == NULL) { + if (!PyDataType_HASFIELDS(self)) { Py_INCREF(Py_None); return Py_None; } @@ -1594,12 +1774,18 @@ arraydescr_names_set(PyArray_Descr *self, PyObject *val) int i; PyObject *new_names; PyObject *new_fields; - if (self->names == NULL) { + + if (!PyDataType_HASFIELDS(self)) { PyErr_SetString(PyExc_ValueError, "there are no fields defined"); return -1; } + if (DEPRECATE("Setting NumPy dtype names is deprecated, the dtype " + "will become immutable in a future version") < 0) { + return -1; + } + N = PyTuple_GET_SIZE(self->names); if (!PySequence_Check(val) || PyObject_Size((PyObject *)val) != N) { PyErr_Format(PyExc_ValueError, @@ -1683,6 +1869,9 @@ static PyGetSetDef arraydescr_getsets[] = { {"isnative", (getter)arraydescr_isnative_get, NULL, NULL, NULL}, + {"isalignedstruct", + (getter)arraydescr_isalignedstruct_get, + NULL, NULL, NULL}, {"fields", (getter)arraydescr_fields_get, NULL, NULL, NULL}, @@ -1717,25 +1906,29 @@ _invalid_metadata_check(PyObject *metadata) } static PyObject * -arraydescr_new(PyTypeObject *NPY_UNUSED(subtype), PyObject *args, PyObject *kwds) +arraydescr_new(PyTypeObject *NPY_UNUSED(subtype), + PyObject *args, PyObject *kwds) { - PyObject *odescr, *ometadata=NULL; + PyObject *odescr, *metadata=NULL; PyArray_Descr *descr, *conv; - Bool align = FALSE; - Bool copy = FALSE; - Bool copied = FALSE; + npy_bool align = FALSE; + npy_bool copy = FALSE; + npy_bool copied = FALSE; + static char *kwlist[] = {"dtype", "align", "copy", "metadata", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&O&O!", kwlist, - &odescr, PyArray_BoolConverter, &align, + &odescr, + PyArray_BoolConverter, &align, PyArray_BoolConverter, ©, - &PyDict_Type, &ometadata)) { + &PyDict_Type, &metadata)) { return NULL; } - if ((ometadata != NULL) && (_invalid_metadata_check(ometadata))) { + if ((metadata != NULL) && (_invalid_metadata_check(metadata))) { return NULL; } + if (align) { if (!PyArray_DescrAlignConverter(odescr, &conv)) { return NULL; @@ -1744,6 +1937,7 @@ arraydescr_new(PyTypeObject *NPY_UNUSED(subtype), PyObject *args, PyObject *kwds else if (!PyArray_DescrConverter(odescr, &conv)) { return NULL; } + /* Get a new copy of it unless it's already a copy */ if (copy && conv->fields == Py_None) { descr = PyArray_DescrNew(conv); @@ -1752,20 +1946,22 @@ arraydescr_new(PyTypeObject *NPY_UNUSED(subtype), PyObject *args, PyObject *kwds copied = TRUE; } - if ((ometadata != NULL)) { + if ((metadata != NULL)) { /* * We need to be sure to make a new copy of the data-type and any * underlying dictionary */ if (!copied) { + copied = TRUE; descr = PyArray_DescrNew(conv); Py_DECREF(conv); conv = descr; } if ((conv->metadata != NULL)) { /* - * Make a copy of the metadata before merging with ometadata - * so that this data-type descriptor has it's own copy + * Make a copy of the metadata before merging with the + * input metadata so that this data-type descriptor has + * it's own copy */ /* Save a reference */ odescr = conv->metadata; @@ -1777,14 +1973,14 @@ arraydescr_new(PyTypeObject *NPY_UNUSED(subtype), PyObject *args, PyObject *kwds * Update conv->metadata with anything new in metadata * keyword, but do not over-write anything already there */ - if (PyDict_Merge(conv->metadata, ometadata, 0) != 0) { + if (PyDict_Merge(conv->metadata, metadata, 0) != 0) { Py_DECREF(conv); return NULL; } } else { /* Make a copy of the input dictionary */ - conv->metadata = PyDict_Copy(ometadata); + conv->metadata = PyDict_Copy(metadata); } } @@ -1926,7 +2122,7 @@ arraydescr_reduce(PyArray_Descr *self, PyObject *NPY_UNUSED(args)) PyTuple_SET_ITEM(state, 1, PyUString_FromFormat("%c", endian)); PyTuple_SET_ITEM(state, 2, arraydescr_subdescr_get(self)); - if (self->names) { + if (PyDataType_HASFIELDS(self)) { Py_INCREF(self->names); Py_INCREF(self->fields); PyTuple_SET_ITEM(state, 3, self->names); @@ -2184,7 +2380,7 @@ arraydescr_setstate(PyArray_Descr *self, PyObject *args) } self->subarray = PyArray_malloc(sizeof(PyArray_ArrayDescr)); - if (self->subarray == NULL) { + if (!PyDataType_HASSUBARRAY(self)) { return PyErr_NoMemory(); } self->subarray->base = (PyArray_Descr *)PyTuple_GET_ITEM(subarray, 0); @@ -2366,7 +2562,7 @@ PyArray_DescrNewByteorder(PyArray_Descr *self, char newendian) new->byteorder = newendian; } } - if (new->names) { + if (PyDataType_HASFIELDS(new)) { PyObject *newfields; PyObject *key, *value; PyObject *newvalue; @@ -2408,7 +2604,7 @@ PyArray_DescrNewByteorder(PyArray_Descr *self, char newendian) Py_DECREF(new->fields); new->fields = newfields; } - if (new->subarray) { + if (PyDataType_HASSUBARRAY(new)) { Py_DECREF(new->subarray->base); new->subarray->base = PyArray_DescrNewByteorder( self->subarray->base, newendian); @@ -2443,95 +2639,490 @@ static PyMethodDef arraydescr_methods[] = { {NULL, NULL, 0, NULL} /* sentinel */ }; -static PyObject * -arraydescr_str(PyArray_Descr *self) +/* + * Checks whether the structured data type in 'dtype' + * has a simple layout, where all the fields are in order, + * follow the alignment based on the NPY_ALIGNED_STRUCT flag, + * and the total length ends just after the last field with + * appropriate alignment padding. + * + * When this returns true, the dtype can be reconstructed + * from a list of the field names and dtypes, and an + * alignment parameter. + * + * Returns 1 if it has a simple layout, 0 otherwise. + */ +static int +is_dtype_struct_simple_layout(PyArray_Descr *dtype) { - PyObject *sub; + PyObject *names, *fields, *key, *tup, *title; + Py_ssize_t i, names_size; + PyArray_Descr *fld_dtype; + int fld_offset, align; + npy_intp total_offset; + + /* Get some properties from the dtype */ + names = dtype->names; + names_size = PyTuple_GET_SIZE(names); + fields = dtype->fields; + align = PyDataType_FLAGCHK(dtype, NPY_ALIGNED_STRUCT); + + /* Start at offset zero */ + total_offset = 0; + + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + if (key == NULL) { + return 0; + } + tup = PyDict_GetItem(fields, key); + if (tup == NULL) { + return 0; + } + if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) { + PyErr_Clear(); + return 0; + } + /* If it's an aligned struct, apply the dtype alignment */ + if (align) { + /* + * Alignment is always a power of 2, so -alignment is + * a bitmask which preserves everything but the undesired + * bits. + */ + total_offset = (total_offset + fld_dtype->alignment - 1) & + (-fld_dtype->alignment); + } + /* If this field doesn't follow the pattern, not a simple layout */ + if (total_offset != fld_offset) { + return 0; + } + /* Get the next offset */ + total_offset += fld_dtype->elsize; + } + + /* If it's an aligned struct, apply the struct-level alignment */ + if (align) { + /* + * Alignment is always a power of 2, so -alignment is + * a bitmask which preserves everything but the undesired + * bits. + */ + total_offset = (total_offset + dtype->alignment - 1) & + (-dtype->alignment); + } + + /* + * If the itemsize doesn't match the final aligned offset, it's + * not a simple layout. + */ + if (total_offset != dtype->elsize) { + return 0; + } - if (self->names) { - PyObject *lst; - lst = arraydescr_protocol_descr_get(self); - if (!lst) { - sub = PyUString_FromString(""); + /* It's a simple layout, since all the above tests passed */ + return 1; +} + +/* + * Returns a string representation of a structured array, + * in a list format. + */ +static PyObject * +arraydescr_struct_list_str(PyArray_Descr *dtype) +{ + PyObject *names, *key, *fields, *ret, *tmp, *tup, *title; + Py_ssize_t i, names_size; + PyArray_Descr *fld_dtype; + int fld_offset; + + names = dtype->names; + names_size = PyTuple_GET_SIZE(names); + fields = dtype->fields; + + /* Build up a string to make the list */ + + /* Go through all the names */ + ret = PyUString_FromString("["); + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(fields, key); + if (tup == NULL) { + return 0; + } + title = NULL; + if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) { PyErr_Clear(); + return 0; + } + PyUString_ConcatAndDel(&ret, PyUString_FromString("(")); + /* Check for whether to do titles as well */ + if (title != NULL && title != Py_None) { + PyUString_ConcatAndDel(&ret, PyUString_FromString("(")); + PyUString_ConcatAndDel(&ret, PyObject_Repr(title)); + PyUString_ConcatAndDel(&ret, PyUString_FromString(", ")); + PyUString_ConcatAndDel(&ret, PyObject_Repr(key)); + PyUString_ConcatAndDel(&ret, PyUString_FromString("), ")); } else { - sub = PyObject_Str(lst); - } - Py_XDECREF(lst); - if (self->type_num != PyArray_VOID) { - PyObject *p, *t; - t=PyUString_FromString("'"); - p = arraydescr_protocol_typestr_get(self); - PyUString_Concat(&p, t); - PyUString_ConcatAndDel(&t, p); - p = PyUString_FromString("("); - PyUString_ConcatAndDel(&p, t); - PyUString_ConcatAndDel(&p, PyUString_FromString(", ")); - PyUString_ConcatAndDel(&p, sub); - PyUString_ConcatAndDel(&p, PyUString_FromString(")")); - sub = p; - } - } - else if (self->subarray) { - PyObject *p; - PyObject *t = PyUString_FromString("("); - PyObject *sh; - p = arraydescr_str(self->subarray->base); - if (!self->subarray->base->names && !self->subarray->base->subarray) { - PyObject *tmp=PyUString_FromString("'"); - PyUString_Concat(&p, tmp); - PyUString_ConcatAndDel(&tmp, p); - p = tmp; - } - PyUString_ConcatAndDel(&t, p); - PyUString_ConcatAndDel(&t, PyUString_FromString(",")); - /*TODO - * self->subarray->shape should always be a tuple, - * so this check should be unnecessary - */ - if (!PyTuple_Check(self->subarray->shape)) { - sh = Py_BuildValue("(O)", self->subarray->shape); + PyUString_ConcatAndDel(&ret, PyObject_Repr(key)); + PyUString_ConcatAndDel(&ret, PyUString_FromString(", ")); + } + /* Special case subarray handling here */ + if (PyDataType_HASSUBARRAY(fld_dtype)) { + tmp = arraydescr_short_construction_repr( + fld_dtype->subarray->base); + PyUString_ConcatAndDel(&ret, tmp); + PyUString_ConcatAndDel(&ret, PyUString_FromString(", ")); + PyUString_ConcatAndDel(&ret, + PyObject_Str(fld_dtype->subarray->shape)); } else { - sh = self->subarray->shape; - Py_INCREF(sh); + tmp = arraydescr_short_construction_repr(fld_dtype); + PyUString_ConcatAndDel(&ret, tmp); + } + PyUString_ConcatAndDel(&ret, PyUString_FromString(")")); + if (i != names_size - 1) { + PyUString_ConcatAndDel(&ret, PyUString_FromString(", ")); } - PyUString_ConcatAndDel(&t, PyObject_Str(sh)); - Py_DECREF(sh); - PyUString_ConcatAndDel(&t, PyUString_FromString(")")); - sub = t; } - else if (PyDataType_ISFLEXIBLE(self) || !PyArray_ISNBO(self->byteorder)) { - sub = arraydescr_protocol_typestr_get(self); + PyUString_ConcatAndDel(&ret, PyUString_FromString("]")); + + return ret; +} + +/* + * Returns a string representation of a structured array, + * in a dict format. + */ +static PyObject * +arraydescr_struct_dict_str(PyArray_Descr *dtype) +{ + PyObject *names, *key, *fields, *ret, *tmp, *tup, *title; + Py_ssize_t i, names_size; + PyArray_Descr *fld_dtype; + int fld_offset, has_titles; + int align, naturalsize; + + names = dtype->names; + names_size = PyTuple_GET_SIZE(names); + fields = dtype->fields; + has_titles = 0; + + /* Used to determine whether the 'itemsize=' is needed */ + align = (dtype->flags&NPY_ALIGNED_STRUCT) != 0; + naturalsize = 0; + + /* Build up a string to make the dictionary */ + + /* First, the names */ + ret = PyUString_FromString("{'names':["); + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + PyUString_ConcatAndDel(&ret, PyObject_Repr(key)); + if (i != names_size - 1) { + PyUString_ConcatAndDel(&ret, PyUString_FromString(",")); + } + } + /* Second, the formats */ + PyUString_ConcatAndDel(&ret, PyUString_FromString("], 'formats':[")); + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(fields, key); + if (tup == NULL) { + return 0; + } + title = NULL; + if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) { + PyErr_Clear(); + return 0; + } + /* Check for whether to do titles as well */ + if (title != NULL && title != Py_None) { + has_titles = 1; + } + tmp = arraydescr_short_construction_repr(fld_dtype); + PyUString_ConcatAndDel(&ret, tmp); + if (i != names_size - 1) { + PyUString_ConcatAndDel(&ret, PyUString_FromString(",")); + } + } + /* Third, the offsets */ + PyUString_ConcatAndDel(&ret, PyUString_FromString("], 'offsets':[")); + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(fields, key); + if (tup == NULL) { + return 0; + } + if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, &fld_offset, &title)) { + PyErr_Clear(); + return 0; + } + PyUString_ConcatAndDel(&ret, PyUString_FromFormat("%d", fld_offset)); + if (i != names_size - 1) { + PyUString_ConcatAndDel(&ret, PyUString_FromString(",")); + } + /* Accumulate the natural size of the dtype */ + if (fld_offset + fld_dtype->elsize > naturalsize) { + naturalsize = fld_offset + fld_dtype->elsize; + } + } + /* Fourth, the titles */ + if (has_titles) { + PyUString_ConcatAndDel(&ret, PyUString_FromString("], 'titles':[")); + for (i = 0; i < names_size; ++i) { + key = PyTuple_GET_ITEM(names, i); + tup = PyDict_GetItem(fields, key); + if (tup == NULL) { + return 0; + } + title = Py_None; + if (!PyArg_ParseTuple(tup, "Oi|O", &fld_dtype, + &fld_offset, &title)) { + PyErr_Clear(); + return 0; + } + PyUString_ConcatAndDel(&ret, PyObject_Repr(title)); + if (i != names_size - 1) { + PyUString_ConcatAndDel(&ret, PyUString_FromString(",")); + } + } + } + /* The alignment is always a power of 2, so this works */ + if (align) { + naturalsize = NPY_NEXT_ALIGNED_OFFSET(naturalsize, dtype->alignment); + } + /* Finally, the itemsize */ + if (naturalsize == dtype->elsize) { + PyUString_ConcatAndDel(&ret, PyUString_FromString("]}")); } else { - sub = arraydescr_typename_get(self); + PyUString_ConcatAndDel(&ret, + PyUString_FromFormat("], 'itemsize':%d}", (int)dtype->elsize)); + } + + return ret; +} + +/* Produces a string representation for a structured dtype */ +static PyObject * +arraydescr_struct_str(PyArray_Descr *dtype) +{ + if (is_dtype_struct_simple_layout(dtype)) { + return arraydescr_struct_list_str(dtype); + } + else { + return arraydescr_struct_dict_str(dtype); + } +} + +/* Produces a string representation for a subarray dtype */ +static PyObject * +arraydescr_subarray_str(PyArray_Descr *dtype) +{ + PyObject *p, *ret; + + ret = PyUString_FromString("("); + p = arraydescr_short_construction_repr(dtype->subarray->base); + PyUString_ConcatAndDel(&ret, p); + PyUString_ConcatAndDel(&ret, PyUString_FromString(", ")); + PyUString_ConcatAndDel(&ret, PyObject_Str(dtype->subarray->shape)); + PyUString_ConcatAndDel(&ret, PyUString_FromString(")")); + + return ret; +} + +static PyObject * +arraydescr_str(PyArray_Descr *dtype) +{ + PyObject *sub; + + if (PyDataType_HASFIELDS(dtype)) { + sub = arraydescr_struct_str(dtype); + } + else if (PyDataType_HASSUBARRAY(dtype)) { + sub = arraydescr_subarray_str(dtype); + } + else if (PyDataType_ISFLEXIBLE(dtype) || !PyArray_ISNBO(dtype->byteorder)) { + sub = arraydescr_protocol_typestr_get(dtype); + } + else { + sub = arraydescr_typename_get(dtype); } return sub; } +/* + * The dtype repr function specifically for structured arrays. + */ static PyObject * -arraydescr_repr(PyArray_Descr *self) +arraydescr_struct_repr(PyArray_Descr *dtype) { PyObject *sub, *s; + s = PyUString_FromString("dtype("); - sub = arraydescr_str(self); + sub = arraydescr_struct_str(dtype); if (sub == NULL) { - return sub; - } - if (!self->names && !self->subarray) { - PyObject *t=PyUString_FromString("'"); - PyUString_Concat(&sub, t); - PyUString_ConcatAndDel(&t, sub); - sub = t; + return NULL; } + PyUString_ConcatAndDel(&s, sub); - sub = PyUString_FromString(")"); - PyUString_ConcatAndDel(&s, sub); + + /* If it's an aligned structure, add the align=True parameter */ + if (dtype->flags&NPY_ALIGNED_STRUCT) { + PyUString_ConcatAndDel(&s, PyUString_FromString(", align=True")); + } + + PyUString_ConcatAndDel(&s, PyUString_FromString(")")); return s; } +/* + * This creates a shorter repr using the 'kind' and 'itemsize', + * instead of the longer type name. This is the object you pass + * as the first parameter to the dtype constructor. + * + * This does not preserve the 'align=True' parameter + * for structured arrays like the regular repr does, because + * this flag is separate from the first dtype constructor parameter. + */ +NPY_NO_EXPORT PyObject * +arraydescr_short_construction_repr(PyArray_Descr *dtype) +{ + PyObject *ret; + PyArray_DatetimeMetaData *meta; + char byteorder[2]; + + if (PyDataType_HASFIELDS(dtype)) { + return arraydescr_struct_str(dtype); + } + else if (PyDataType_HASSUBARRAY(dtype)) { + return arraydescr_subarray_str(dtype); + } + + /* Normalize byteorder to '<' or '>' */ + switch (dtype->byteorder) { + case NPY_NATIVE: + byteorder[0] = NPY_NATBYTE; + break; + case NPY_SWAP: + byteorder[0] = NPY_OPPBYTE; + break; + case NPY_IGNORE: + byteorder[0] = '\0'; + break; + default: + byteorder[0] = dtype->byteorder; + break; + } + byteorder[1] = '\0'; + + /* Handle booleans, numbers, and custom dtypes */ + if (dtype->type_num == NPY_BOOL) { + return PyUString_FromString("'?'"); + } + else if (PyTypeNum_ISNUMBER(dtype->type_num)) { + return PyUString_FromFormat("'%s%c%d'", byteorder, (int)dtype->kind, + dtype->elsize); + } + else if (PyTypeNum_ISUSERDEF(dtype->type_num)) { + char *s = strrchr(dtype->typeobj->tp_name, '.'); + if (s == NULL) { + return PyUString_FromString(dtype->typeobj->tp_name); + } + else { + return PyUString_FromStringAndSize(s + 1, strlen(s) - 1); + } + } + + /* All the rest which don't fit in the same pattern */ + switch (dtype->type_num) { + /* + * The object reference may be different sizes on different + * platforms, so it should never include the itemsize here. + */ + case NPY_OBJECT: + return PyUString_FromString("'O'"); + + case NPY_STRING: + if (dtype->elsize == 0) { + return PyUString_FromString("'S'"); + } + else { + return PyUString_FromFormat("'S%d'", (int)dtype->elsize); + } + + case NPY_UNICODE: + if (dtype->elsize == 0) { + return PyUString_FromFormat("'%sU'", byteorder); + } + else { + return PyUString_FromFormat("'%sU%d'", byteorder, + (int)dtype->elsize / 4); + } + + case NPY_VOID: + if (dtype->elsize == 0) { + return PyUString_FromString("'V'"); + } + else { + return PyUString_FromFormat("'V%d'", (int)dtype->elsize); + } + + case NPY_DATETIME: + meta = get_datetime_metadata_from_dtype(dtype); + if (meta == NULL) { + return NULL; + } + ret = PyUString_FromFormat("%sM8", byteorder); + return append_metastr_to_string(meta, 0, ret); + + case NPY_TIMEDELTA: + meta = get_datetime_metadata_from_dtype(dtype); + if (meta == NULL) { + return NULL; + } + ret = PyUString_FromFormat("%sm8", byteorder); + return append_metastr_to_string(meta, 0, ret); + + default: + PyErr_SetString(PyExc_RuntimeError, "Internal error: NumPy dtype " + "unrecognized type number"); + return NULL; + } +} + +/* + * The general dtype repr function. + */ +static PyObject * +arraydescr_repr(PyArray_Descr *dtype) +{ + PyObject *sub, *s; + + if (PyDataType_HASFIELDS(dtype)) { + return arraydescr_struct_repr(dtype); + } + else { + s = PyUString_FromString("dtype("); + sub = arraydescr_str(dtype); + if (sub == NULL) { + return NULL; + } + if (!PyDataType_HASSUBARRAY(dtype)) { + PyObject *t=PyUString_FromString("'"); + PyUString_Concat(&sub, t); + PyUString_ConcatAndDel(&t, sub); + sub = t; + } + PyUString_ConcatAndDel(&s, sub); + sub = PyUString_FromString(")"); + PyUString_ConcatAndDel(&s, sub); + return s; + } +} + static PyObject * arraydescr_richcompare(PyArray_Descr *self, PyObject *other, int cmp_op) { @@ -2611,7 +3202,7 @@ descr_length(PyObject *self0) { PyArray_Descr *self = (PyArray_Descr *)self0; - if (self->names) { + if (PyDataType_HASFIELDS(self)) { return PyTuple_GET_SIZE(self->names); } else { @@ -2642,7 +3233,7 @@ descr_subscript(PyArray_Descr *self, PyObject *op) { PyObject *retval; - if (!self->names) { + if (!PyDataType_HASFIELDS(self)) { PyObject *astr = arraydescr_str(self); #if defined(NPY_PY3K) PyObject *bstr = PyUnicode_AsUnicodeEscapeString(astr); diff --git a/numpy/core/src/multiarray/descriptor.h b/numpy/core/src/multiarray/descriptor.h index acb80eec6954..4f8a90582b3e 100644 --- a/numpy/core/src/multiarray/descriptor.h +++ b/numpy/core/src/multiarray/descriptor.h @@ -10,6 +10,18 @@ array_set_typeDict(PyObject *NPY_UNUSED(ignored), PyObject *args); NPY_NO_EXPORT PyArray_Descr * _arraydescr_fromobj(PyObject *obj); +/* + * This creates a shorter repr using the 'kind' and 'itemsize', + * instead of the longer type name. It also creates the input + * for constructing a dtype rather than the full dtype function + * call. + * + * This does not preserve the 'align=True' parameter + * for structured arrays like the regular repr does. + */ +NPY_NO_EXPORT PyObject * +arraydescr_short_construction_repr(PyArray_Descr *dtype); + #ifdef NPY_ENABLE_SEPARATE_COMPILATION extern NPY_NO_EXPORT char *_datetime_strings[]; #endif diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index fa5573ad54f6..02990f19350d 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -1974,7 +1974,7 @@ get_subarray_transfer_function(int aligned, npy_intp src_size = 1, dst_size = 1; /* Get the subarray shapes and sizes */ - if (src_dtype->subarray != NULL) { + if (PyDataType_HASSUBARRAY(src_dtype)) { if (!(PyArray_IntpConverter(src_dtype->subarray->shape, &src_shape))) { PyErr_SetString(PyExc_ValueError, @@ -1984,7 +1984,7 @@ get_subarray_transfer_function(int aligned, src_size = PyArray_MultiplyList(src_shape.ptr, src_shape.len); src_dtype = src_dtype->subarray->base; } - if (dst_dtype->subarray != NULL) { + if (PyDataType_HASSUBARRAY(dst_dtype)) { if (!(PyArray_IntpConverter(dst_dtype->subarray->shape, &dst_shape))) { if (src_shape.ptr != NULL) { @@ -2848,7 +2848,7 @@ get_setdstzero_transfer_function(int aligned, *out_transferdata = NULL; } /* If there are subarrays, need to wrap it */ - else if (dst_dtype->subarray != NULL) { + else if (PyDataType_HASSUBARRAY(dst_dtype)) { PyArray_Dims dst_shape = {NULL, -1}; npy_intp dst_size = 1; PyArray_StridedTransferFn *contig_stransfer; @@ -2961,7 +2961,7 @@ get_decsrcref_transfer_function(int aligned, return NPY_SUCCEED; } /* If there are subarrays, need to wrap it */ - else if (src_dtype->subarray != NULL) { + else if (PyDataType_HASSUBARRAY(src_dtype)) { PyArray_Dims src_shape = {NULL, -1}; npy_intp src_size = 1; PyArray_StridedTransferFn *stransfer; @@ -3114,7 +3114,8 @@ PyArray_GetDTypeTransferFunction(int aligned, if (src_itemsize == dst_itemsize && src_dtype->kind == dst_dtype->kind && !PyDataType_HASFIELDS(src_dtype) && !PyDataType_HASFIELDS(dst_dtype) && - src_dtype->subarray == NULL && dst_dtype->subarray == NULL && + !PyDataType_HASSUBARRAY(src_dtype) && + !PyDataType_HASSUBARRAY(dst_dtype) && src_type_num != NPY_DATETIME && src_type_num != NPY_TIMEDELTA) { /* A custom data type requires that we use its copy/swap */ if (src_type_num >= NPY_NTYPES || dst_type_num >= NPY_NTYPES) { @@ -3193,7 +3194,8 @@ PyArray_GetDTypeTransferFunction(int aligned, } /* Handle subarrays */ - if (src_dtype->subarray != NULL || dst_dtype->subarray != NULL) { + if (PyDataType_HASSUBARRAY(src_dtype) || + PyDataType_HASSUBARRAY(dst_dtype)) { return get_subarray_transfer_function(aligned, src_stride, dst_stride, src_dtype, dst_dtype, diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index 3d2b6975eb11..e039ed6119bf 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -438,7 +438,7 @@ array_descr_set(PyArrayObject *self, PyObject *arg) if ((newtype->elsize != self->descr->elsize) && (self->nd == 0 || !PyArray_ISONESEGMENT(self) || - newtype->subarray)) { + PyDataType_HASSUBARRAY(newtype))) { goto fail; } if (PyArray_ISCONTIGUOUS(self)) { @@ -474,7 +474,7 @@ array_descr_set(PyArrayObject *self, PyObject *arg) /* fall through -- adjust type*/ Py_DECREF(self->descr); - if (newtype->subarray) { + if (PyDataType_HASSUBARRAY(newtype)) { /* * create new array object from data and update * dimensions, strides and descr from it @@ -550,7 +550,7 @@ array_struct_get(PyArrayObject *self) inter->strides = NULL; } inter->data = self->data; - if (self->descr->names) { + if (PyDataType_HASFIELDS(self->descr)) { inter->descr = arraydescr_protocol_descr_get(self->descr); if (inter->descr == NULL) { PyErr_Clear(); diff --git a/numpy/core/src/multiarray/hashdescr.c b/numpy/core/src/multiarray/hashdescr.c index 60a9a7361a13..d4dbdf7e8126 100644 --- a/numpy/core/src/multiarray/hashdescr.c +++ b/numpy/core/src/multiarray/hashdescr.c @@ -55,7 +55,7 @@ static int _is_array_descr_builtin(PyArray_Descr* descr) if (descr->fields != NULL && descr->fields != Py_None) { return 0; } - if (descr->subarray != NULL) { + if (PyDataType_HASSUBARRAY(descr)) { return 0; } return 1; @@ -223,7 +223,7 @@ static int _array_descr_walk(PyArray_Descr* descr, PyObject *l) return -1; } } - if(descr->subarray != NULL) { + if(PyDataType_HASSUBARRAY(descr)) { st = _array_descr_walk_subarray(descr->subarray, l); if (st) { return -1; diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 31472333de45..3f1d2957e552 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -545,7 +545,7 @@ array_subscript(PyArrayObject *self, PyObject *op) if (PyString_Check(op) || PyUnicode_Check(op)) { PyObject *temp; - if (self->descr->names) { + if (PyDataType_HASFIELDS(self->descr)) { obj = PyDict_GetItem(self->descr->fields, op); if (obj != NULL) { PyArray_Descr *descr; @@ -573,7 +573,9 @@ array_subscript(PyArrayObject *self, PyObject *op) } /* Check for multiple field access */ - if (self->descr->names && PySequence_Check(op) && !PyTuple_Check(op)) { + if (PyDataType_HASFIELDS(self->descr) && + PySequence_Check(op) && + !PyTuple_Check(op)) { int seqlen, i; seqlen = PySequence_Size(op); for (i = 0; i < seqlen; i++) { @@ -797,7 +799,7 @@ array_ass_sub(PyArrayObject *self, PyObject *index, PyObject *op) } if (PyString_Check(index) || PyUnicode_Check(index)) { - if (self->descr->names) { + if (PyDataType_HASFIELDS(self->descr)) { PyObject *obj; obj = PyDict_GetItem(self->descr->fields, index); diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 9502bc9f04ad..08a82c2e8d09 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -1144,7 +1144,7 @@ array_sort(PyArrayObject *self, PyObject *args, PyObject *kwds) PyObject *new_name; PyObject *_numpy_internal; saved = self->descr; - if (saved->names == NULL) { + if (!PyDataType_HASFIELDS(saved)) { PyErr_SetString(PyExc_ValueError, "Cannot specify " \ "order when the array has no fields."); return NULL; @@ -1198,7 +1198,7 @@ array_argsort(PyArrayObject *self, PyObject *args, PyObject *kwds) PyObject *new_name; PyObject *_numpy_internal; saved = self->descr; - if (saved->names == NULL) { + if (!PyDataType_HASFIELDS(saved)) { PyErr_SetString(PyExc_ValueError, "Cannot specify " \ "order when the array has no fields."); return NULL; diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index d4ccc42c8262..4607c29042d1 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -757,7 +757,7 @@ PyArray_Scalar(void *data, PyArray_Descr *descr, PyObject *base) Py_SIZE(vobj) = itemsize; vobj->flags = BEHAVED | OWNDATA; swap = 0; - if (descr->names) { + if (PyDataType_HASFIELDS(descr)) { if (base) { Py_INCREF(base); vobj->base = base; diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 380ec3493d7d..a0c08dcbf9b7 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -1955,7 +1955,7 @@ static PyMethodDef @name@type_methods[] = { static Py_ssize_t voidtype_length(PyVoidScalarObject *self) { - if (!self->descr->names) { + if (!PyDataType_HASFIELDS(self->descr)) { return 0; } else { /* return the number of fields */ diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index c79b755bea23..70046cb72ea3 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -41,6 +41,25 @@ def test_invalid_types(self): #print typestr assert_raises(TypeError, np.dtype, typestr) + def test_bad_param(self): + # Can't give a size that's too small + assert_raises(ValueError, np.dtype, + {'names':['f0','f1'], + 'formats':['i4', 'i1'], + 'offsets':[0,4], + 'itemsize':4}) + # If alignment is enabled, the alignment (4) must divide the itemsize + assert_raises(ValueError, np.dtype, + {'names':['f0','f1'], + 'formats':['i4', 'i1'], + 'offsets':[0,4], + 'itemsize':9}, align=True) + # If alignment is enabled, the individual fields must be aligned + assert_raises(ValueError, np.dtype, + {'names':['f0','f1'], + 'formats':['i1','f4'], + 'offsets':[0,2]}, align=True) + class TestRecord(TestCase): def test_equivalent_record(self): """Test whether equivalent record dtypes hash the same.""" @@ -56,10 +75,12 @@ def test_different_names(self): def test_different_titles(self): # In theory, they may hash the same (collision) ? - a = np.dtype({'names': ['r','b'], 'formats': ['u1', 'u1'], - 'titles': ['Red pixel', 'Blue pixel']}) - b = np.dtype({'names': ['r','b'], 'formats': ['u1', 'u1'], - 'titles': ['RRed pixel', 'Blue pixel']}) + a = np.dtype({'names': ['r','b'], + 'formats': ['u1', 'u1'], + 'titles': ['Red pixel', 'Blue pixel']}) + b = np.dtype({'names': ['r','b'], + 'formats': ['u1', 'u1'], + 'titles': ['RRed pixel', 'Blue pixel']}) assert_dtype_not_equal(a, b) def test_not_lists(self): @@ -71,6 +92,99 @@ def test_not_lists(self): self.assertRaises(TypeError, np.dtype, dict(names=['A', 'B'], formats=set(['f8', 'i4']))) + def test_aligned_size(self): + # Check that structured dtypes get padded to an aligned size + dt = np.dtype('i4, i1', align=True) + assert_equal(dt.itemsize, 8) + dt = np.dtype([('f0', 'i4'), ('f1', 'i1')], align=True) + assert_equal(dt.itemsize, 8) + dt = np.dtype({'names':['f0','f1'], + 'formats':['i4', 'u1'], + 'offsets':[0,4]}, align=True) + assert_equal(dt.itemsize, 8) + dt = np.dtype({'f0': ('i4', 0), 'f1':('u1', 4)}, align=True) + assert_equal(dt.itemsize, 8) + # Nesting should preserve that alignment + dt1 = np.dtype([('f0', 'i4'), + ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]), + ('f2', 'i1')], align=True) + assert_equal(dt1.itemsize, 20) + dt2 = np.dtype({'names':['f0','f1','f2'], + 'formats':['i4', + [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], + 'i1'], + 'offsets':[0, 4, 16]}, align=True) + assert_equal(dt2.itemsize, 20) + dt3 = np.dtype({'f0': ('i4', 0), + 'f1': ([('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], 4), + 'f2': ('i1', 16)}, align=True) + assert_equal(dt3.itemsize, 20) + assert_equal(dt1, dt2) + assert_equal(dt2, dt3) + # Nesting should preserve packing + dt1 = np.dtype([('f0', 'i4'), + ('f1', [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')]), + ('f2', 'i1')], align=False) + assert_equal(dt1.itemsize, 11) + dt2 = np.dtype({'names':['f0','f1','f2'], + 'formats':['i4', + [('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], + 'i1'], + 'offsets':[0, 4, 10]}, align=False) + assert_equal(dt2.itemsize, 11) + dt3 = np.dtype({'f0': ('i4', 0), + 'f1': ([('f1', 'i1'), ('f2', 'i4'), ('f3', 'i1')], 4), + 'f2': ('i1', 10)}, align=False) + assert_equal(dt3.itemsize, 11) + assert_equal(dt1, dt2) + assert_equal(dt2, dt3) + + def test_union_struct(self): + # Should be able to create union dtypes + dt = np.dtype({'names':['f0','f1','f2'], 'formats':['f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])]) + assert_equal(str(dt), + "[('top', [('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,)), " + "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))])]") + + dt = np.dtype({'names': ['r','g','b'], 'formats': ['u1', 'u1', 'u1'], + 'offsets': [0, 1, 2], + 'titles': ['Red pixel', 'Green pixel', 'Blue pixel']}) + assert_equal(str(dt), + "[(('Red pixel', 'r'), 'u1'), " + "(('Green pixel', 'g'), 'u1'), " + "(('Blue pixel', 'b'), 'u1')]") + + dt = np.dtype({'names': ['r','b'], 'formats': ['u1', 'u1'], + 'offsets': [0, 2], + 'titles': ['Red pixel', 'Blue pixel']}) + assert_equal(str(dt), + "{'names':['r','b'], " + "'formats':['u1','u1'], " + "'offsets':[0,2], " + "'titles':['Red pixel','Blue pixel']}") + + def test_complex_dtype_repr(self): + dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), + ('rtile', '>f4', (64, 36))], (3,)), + ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), + ('bright', '>f4', (8, 36))])]) + assert_equal(repr(dt), + "dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), " + "('rtile', '>f4', (64, 36))], (3,)), " + "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), " + "('bright', '>f4', (8, 36))])])") + + dt = np.dtype({'names': ['r','g','b'], 'formats': ['u1', 'u1', 'u1'], + 'offsets': [0, 1, 2], + 'titles': ['Red pixel', 'Green pixel', 'Blue pixel']}, + align=True) + assert_equal(repr(dt), + "dtype([(('Red pixel', 'r'), 'u1'), " + "(('Green pixel', 'g'), 'u1'), " + "(('Blue pixel', 'b'), 'u1')], align=True)") + + dt = np.dtype({'names': ['r','b'], 'formats': ['u1', 'u1'], + 'offsets': [0, 2], + 'titles': ['Red pixel', 'Blue pixel'], + 'itemsize': 4}) + assert_equal(repr(dt), + "dtype({'names':['r','b'], " + "'formats':['u1','u1'], " + "'offsets':[0,2], " + "'titles':['Red pixel','Blue pixel'], " + "'itemsize':4})") + if __name__ == "__main__": run_module_suite() diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index a4cc2d72573a..1c43189bb330 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -853,17 +853,6 @@ def test_floats_from_string(self, level=rlevel): assert_almost_equal(fdouble, 1.234) assert_almost_equal(flongdouble, 1.234) - def test_complex_dtype_printing(self, level=rlevel): - dt = np.dtype([('top', [('tiles', ('>f4', (64, 64)), (1,)), - ('rtile', '>f4', (64, 36))], (3,)), - ('bottom', [('bleft', ('>f4', (8, 64)), (1,)), - ('bright', '>f4', (8, 36))])]) - assert_equal(str(dt), - "[('top', [('tiles', ('>f4', (64, 64)), (1,)), " - "('rtile', '>f4', (64, 36))], (3,)), " - "('bottom', [('bleft', ('>f4', (8, 64)), (1,)), " - "('bright', '>f4', (8, 36))])]") - def test_nonnative_endian_fill(self, level=rlevel): """ Non-native endian arrays were incorrectly filled with scalars before r5034.