From f60797ba64ccf33597225d23b893b6eb11149860 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Sun, 30 Jan 2011 23:26:43 -0800 Subject: [PATCH 1/3] BUG: perf: Operations like "a[10:20] += 10" were doing a redundant copy --- numpy/core/src/multiarray/ctors.c | 42 ++++++++++++++++++-- numpy/core/src/multiarray/new_iterator.c.src | 5 +-- 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index e0115db8b61a..7ebc83507cce 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -447,7 +447,8 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems, } /* Gets a half-open range [start, end) which contains the array data */ -void _get_memory_extents(PyArrayObject *arr, +NPY_NO_EXPORT void +_get_array_memory_extents(PyArrayObject *arr, npy_uintp *out_start, npy_uintp *out_end) { npy_uintp start, end; @@ -481,12 +482,13 @@ void _get_memory_extents(PyArrayObject *arr, } /* Returns 1 if the arrays have overlapping data, 0 otherwise */ -int _arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2) +NPY_NO_EXPORT int +_arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2) { npy_uintp start1 = 0, start2 = 0, end1 = 0, end2 = 0; - _get_memory_extents(arr1, &start1, &end1); - _get_memory_extents(arr2, &start2, &end2); + _get_array_memory_extents(arr1, &start1, &end1); + _get_array_memory_extents(arr2, &start2, &end2); return (start1 < end2) && (start2 < end1); } @@ -506,6 +508,38 @@ int _arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2) NPY_NO_EXPORT int PyArray_MoveInto(PyArrayObject *dst, PyArrayObject *src) { + /* + * Performance fix for expresions like "a[1000:6000] += x". In this + * case, first an in-place add is done, followed by an assignment, + * equivalently expressed like this: + * + * tmp = a[1000:6000] # Calls array_subscript_nice in mapping.c + * np.add(tmp, x, tmp) + * a[1000:6000] = tmp # Calls array_ass_sub in mapping.c + * + * In the assignment the underlying data type, shape, strides, and + * data pointers are identical, but src != dst because they are separately + * generated slices. By detecting this and skipping the redundant + * copy of values to themselves, we potentially give a big speed boost. + * + * Note that we don't call EquivTypes, because usually the exact same + * dtype object will appear, and we don't want to slow things down + * with a complicated comparison. The comparisons are ordered to + * try and reject this with as little work as possible. + */ + if (PyArray_DATA(src) == PyArray_DATA(dst) && + PyArray_DESCR(src) == PyArray_DESCR(dst) && + PyArray_NDIM(src) == PyArray_NDIM(dst) && + PyArray_CompareLists(PyArray_DIMS(src), + PyArray_DIMS(dst), + PyArray_NDIM(src)) && + PyArray_CompareLists(PyArray_STRIDES(src), + PyArray_STRIDES(dst), + PyArray_NDIM(src))) { + /*printf("Redundant copy operation detected\n");*/ + return 0; + } + /* * A special case is when there is just one dimension with positive * strides, and we pass that to CopyInto, which correctly handles diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index 4d031efbd2dc..49d8ad8b5bc8 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -4632,10 +4632,7 @@ npyiter_allocate_arrays(NpyIter *iter, npyiter_replace_axisdata(iter, iiter, op[iiter], ondim, PyArray_DATA(op[iiter]), op_axes ? op_axes[iiter] : NULL); - /* - * The temporary copy is aligned and needs no cast, and - * has constant stride 0 so never needs buffering - */ + /* The temporary copy is aligned and needs no cast */ op_itflags[iiter] |= NPY_OP_ITFLAG_ALIGNED; op_itflags[iiter] &= ~NPY_OP_ITFLAG_CAST; } From fcc6cc73ddcb1fc85446ba9256ac24ecdda6c6d8 Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Mon, 31 Jan 2011 19:42:19 -0800 Subject: [PATCH 2/3] ENH: core: Small tweaks to streamline things a bit --- numpy/core/src/multiarray/convert_datatype.c | 87 +++++---- numpy/core/src/multiarray/ctors.c | 175 +++++++++---------- numpy/core/src/multiarray/mapping.c | 2 +- 3 files changed, 129 insertions(+), 135 deletions(-) diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 27481bafc062..87b4fe7f8acf 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -486,7 +486,9 @@ promote_types(PyArray_Descr *type1, PyArray_Descr *type2, int is_small_unsigned1, int is_small_unsigned2) { if (is_small_unsigned1) { - int type_num1 = type1->type_num, type_num2 = type2->type_num, ret_type_num; + int type_num1 = type1->type_num; + int type_num2 = type2->type_num; + int ret_type_num; if (type_num2 < NPY_NTYPES && !(PyTypeNum_ISBOOL(type_num2) || PyTypeNum_ISUNSIGNED(type_num2))) { @@ -503,9 +505,9 @@ promote_types(PyArray_Descr *type1, PyArray_Descr *type2, return PyArray_PromoteTypes(type1, type2); } else if (is_small_unsigned2) { - int type_num1 = type1->type_num, - type_num2 = type2->type_num, - ret_type_num; + int type_num1 = type1->type_num; + int type_num2 = type2->type_num; + int ret_type_num; if (type_num1 < NPY_NTYPES && !(PyTypeNum_ISBOOL(type_num1) || PyTypeNum_ISUNSIGNED(type_num1))) { @@ -536,25 +538,6 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) { int type_num1, type_num2, ret_type_num; - /* If one of the arguments is NULL, return the non-NULL one */ - if (type1 == NULL || type2 == NULL) { - if (type1 == NULL) { - if (type2 == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "PromoteTypes received two NULL arguments"); - return NULL; - } - else { - Py_INCREF(type2); - return type2; - } - } - else { - Py_INCREF(type1); - return type1; - } - } - type_num1 = type1->type_num; type_num2 = type2->type_num; @@ -1146,9 +1129,12 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr, Py_INCREF(ret); } else { - tmpret = PyArray_PromoteTypes(tmp, ret); - Py_DECREF(ret); - ret = tmpret; + /* Only call promote if the types aren't the same dtype */ + if (tmp != ret || !PyArray_ISNBO(ret->byteorder)) { + tmpret = PyArray_PromoteTypes(tmp, ret); + Py_DECREF(ret); + ret = tmpret; + } } } } @@ -1163,7 +1149,8 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr, * unsigned integer which would fit an a signed integer * of the same size, something not exposed in the public API. */ - if (PyArray_NDIM(arr[i]) == 0 && PyTypeNum_ISNUMBER(tmp->type_num)) { + if (PyArray_NDIM(arr[i]) == 0 && + PyTypeNum_ISNUMBER(tmp->type_num)) { char *data = PyArray_BYTES(arr[i]); int swap = !PyArray_ISNBO(tmp->byteorder); int type_num; @@ -1199,18 +1186,24 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr, printf(" (%d) ", ret_is_small_unsigned); printf("\n"); #endif - tmpret = promote_types(tmp, ret, tmp_is_small_unsigned, - ret_is_small_unsigned); - if (tmpret == NULL) { + /* If they point to the same type, don't call promote */ + if (tmp == ret && PyArray_ISNBO(tmp->byteorder)) { + Py_DECREF(tmp); + } + else { + tmpret = promote_types(tmp, ret, tmp_is_small_unsigned, + ret_is_small_unsigned); + if (tmpret == NULL) { + Py_DECREF(tmp); + Py_DECREF(ret); + return NULL; + } Py_DECREF(tmp); Py_DECREF(ret); - return NULL; + ret = tmpret; } ret_is_small_unsigned = tmp_is_small_unsigned && ret_is_small_unsigned; - Py_DECREF(tmp); - Py_DECREF(ret); - ret = tmpret; } } @@ -1222,19 +1215,23 @@ PyArray_ResultType(npy_intp narrs, PyArrayObject **arr, Py_INCREF(ret); } else { - if (ret_is_small_unsigned) { - tmpret = promote_types(tmp, ret, 0, ret_is_small_unsigned); - if (tmpret == NULL) { - Py_DECREF(tmp); - Py_DECREF(ret); - return NULL; + /* Only call promote if the types aren't the same dtype */ + if (tmp != ret || !PyArray_ISNBO(tmp->byteorder)) { + if (ret_is_small_unsigned) { + tmpret = promote_types(tmp, ret, 0, + ret_is_small_unsigned); + if (tmpret == NULL) { + Py_DECREF(tmp); + Py_DECREF(ret); + return NULL; + } } + else { + tmpret = PyArray_PromoteTypes(tmp, ret); + } + Py_DECREF(ret); + ret = tmpret; } - else { - tmpret = PyArray_PromoteTypes(tmp, ret); - } - Py_DECREF(ret); - ret = tmpret; } } } diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 7ebc83507cce..b433f81ba1ab 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1054,11 +1054,11 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, if (descr->subarray) { PyObject *ret; - npy_intp newdims[2*MAX_DIMS]; + npy_intp newdims[2*NPY_MAXDIMS]; npy_intp *newstrides = NULL; memcpy(newdims, dims, nd*sizeof(npy_intp)); if (strides) { - newstrides = newdims + MAX_DIMS; + newstrides = newdims + NPY_MAXDIMS; memcpy(newstrides, strides, nd*sizeof(npy_intp)); } nd =_update_descr_and_dimensions(&descr, newdims, @@ -1068,15 +1068,11 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, data, flags, obj); return ret; } - if (nd < 0) { - PyErr_SetString(PyExc_ValueError, - "number of dimensions must be >=0"); - Py_DECREF(descr); - return NULL; - } - if (nd > MAX_DIMS) { + + if ((unsigned int)nd > (unsigned int)NPY_MAXDIMS) { PyErr_Format(PyExc_ValueError, - "maximum number of dimensions is %d", MAX_DIMS); + "number of dimensions must be within [0, %d]", + NPY_MAXDIMS); Py_DECREF(descr); return NULL; } @@ -1092,12 +1088,11 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, } PyArray_DESCR_REPLACE(descr); if (descr->type_num == NPY_STRING) { - descr->elsize = 1; + sd = descr->elsize = 1; } else { - descr->elsize = sizeof(PyArray_UCS4); + sd = descr->elsize = sizeof(PyArray_UCS4); } - sd = descr->elsize; } largest = NPY_MAX_INTP / sd; @@ -1111,6 +1106,7 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, */ continue; } + if (dim < 0) { PyErr_SetString(PyExc_ValueError, "negative dimensions " \ @@ -1118,14 +1114,15 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, Py_DECREF(descr); return NULL; } - if (dim > largest) { + + size *= dim; + + if (size > largest) { PyErr_SetString(PyExc_ValueError, "array is too big."); Py_DECREF(descr); return NULL; } - size *= dim; - largest /= dim; } self = (PyArrayObject *) subtype->tp_alloc(subtype, 0); @@ -1139,15 +1136,15 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, if (data == NULL) { self->flags = DEFAULT; if (flags) { - self->flags |= FORTRAN; + self->flags |= NPY_F_CONTIGUOUS; if (nd > 1) { - self->flags &= ~CONTIGUOUS; + self->flags &= ~NPY_C_CONTIGUOUS; } - flags = FORTRAN; + flags = NPY_F_CONTIGUOUS; } } else { - self->flags = (flags & ~UPDATEIFCOPY); + self->flags = (flags & ~NPY_UPDATEIFCOPY); } self->descr = descr; self->base = (PyObject *)NULL; @@ -1176,7 +1173,7 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, } else { self->dimensions = self->strides = NULL; - self->flags |= FORTRAN; + self->flags |= NPY_F_CONTIGUOUS; } if (data == NULL) { @@ -1189,7 +1186,8 @@ PyArray_NewFromDescr(PyTypeObject *subtype, PyArray_Descr *descr, int nd, if (sd == 0) { sd = descr->elsize; } - if ((data = PyDataMem_NEW(sd)) == NULL) { + data = PyDataMem_NEW(sd); + if (data == NULL) { PyErr_NoMemory(); goto fail; } @@ -1481,7 +1479,7 @@ _array_from_buffer_3118(PyObject *obj, PyObject **out) /*NUMPY_API - * Does not check for ENSURECOPY and NOTSWAPPED in flags + * Does not check for NPY_ENSURECOPY and NPY_NOTSWAPPED in flags * Steals a reference to newtype --- which can be NULL */ NPY_NO_EXPORT PyObject * @@ -1505,14 +1503,14 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, r = PyArray_FromArray((PyArrayObject *)op, newtype, flags); } else if (PyArray_IsScalar(op, Generic)) { - if (flags & UPDATEIFCOPY) { + if (flags & NPY_UPDATEIFCOPY) { goto err; } r = PyArray_FromScalar(op, newtype); } else if (newtype == NULL && (newtype = _array_find_python_scalar_type(op))) { - if (flags & UPDATEIFCOPY) { + if (flags & NPY_UPDATEIFCOPY) { goto err; } r = Array_FromPyScalar(op, newtype); @@ -1542,7 +1540,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, else { int isobject = 0; - if (flags & UPDATEIFCOPY) { + if (flags & NPY_UPDATEIFCOPY) { goto err; } if (newtype == NULL) { @@ -1559,7 +1557,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, /* necessary but not sufficient */ Py_INCREF(newtype); - r = Array_FromSequence(op, newtype, flags & FORTRAN, + r = Array_FromSequence(op, newtype, flags & NPY_F_CONTIGUOUS, min_depth, max_depth); if (r == NULL && (thiserr=PyErr_Occurred())) { if (PyErr_GivenExceptionMatches(thiserr, @@ -1574,7 +1572,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, if (isobject) { Py_INCREF(newtype); r = ObjectArray_FromNestedList - (op, newtype, flags & FORTRAN); + (op, newtype, flags & NPY_F_CONTIGUOUS); seq = TRUE; Py_DECREF(newtype); } @@ -1626,16 +1624,16 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, /* * flags is any of - * CONTIGUOUS, - * FORTRAN, - * ALIGNED, - * WRITEABLE, - * NOTSWAPPED, - * ENSURECOPY, - * UPDATEIFCOPY, - * FORCECAST, - * ENSUREARRAY, - * ELEMENTSTRIDES + * NPY_C_CONTIGUOUS (CONTIGUOUS), + * NPY_F_CONTIGUOUS (FORTRAN), + * NPY_ALIGNED, + * NPY_WRITEABLE, + * NPY_NOTSWAPPED, + * NPY_ENSURECOPY, + * NPY_UPDATEIFCOPY, + * NPY_FORCECAST, + * NPY_ENSUREARRAY, + * NPY_ELEMENTSTRIDES * * or'd (|) together * @@ -1644,24 +1642,24 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, * won't guarantee it -- it will depend on the object as to whether or * not it has such features. * - * Note that ENSURECOPY is enough - * to guarantee CONTIGUOUS, ALIGNED and WRITEABLE + * Note that NPY_ENSURECOPY is enough + * to guarantee NPY_C_CONTIGUOUS, NPY_ALIGNED and NPY_WRITEABLE * and therefore it is redundant to include those as well. * - * BEHAVED == ALIGNED | WRITEABLE - * CARRAY = CONTIGUOUS | BEHAVED - * FARRAY = FORTRAN | BEHAVED + * NPY_BEHAVED == NPY_ALIGNED | NPY_WRITEABLE + * NPY_CARRAY = NPY_C_CONTIGUOUS | NPY_BEHAVED + * NPY_FARRAY = NPY_F_CONTIGUOUS | NPY_BEHAVED * - * FORTRAN can be set in the FLAGS to request a FORTRAN array. + * NPY_F_CONTIGUOUS can be set in the FLAGS to request a FORTRAN array. * Fortran arrays are always behaved (aligned, * notswapped, and writeable) and not (C) CONTIGUOUS (if > 1d). * - * UPDATEIFCOPY flag sets this flag in the returned array if a copy is + * NPY_UPDATEIFCOPY flag sets this flag in the returned array if a copy is * made and the base argument points to the (possibly) misbehaved array. * When the new array is deallocated, the original array held in base * is updated with the contents of the new array. * - * FORCECAST will cause a cast to occur regardless of whether or not + * NPY_FORCECAST will cause a cast to occur regardless of whether or not * it is safe. */ @@ -1673,7 +1671,7 @@ PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int min_depth, int max_depth, int requires, PyObject *context) { PyObject *obj; - if (requires & NOTSWAPPED) { + if (requires & NPY_NOTSWAPPED) { if (!descr && PyArray_Check(op) && !PyArray_ISNBO(PyArray_DESCR(op)->byteorder)) { descr = PyArray_DescrNew(PyArray_DESCR(op)); @@ -1690,10 +1688,10 @@ PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int min_depth, if (obj == NULL) { return NULL; } - if ((requires & ELEMENTSTRIDES) && + if ((requires & NPY_ELEMENTSTRIDES) && !PyArray_ElementStrides(obj)) { PyObject *new; - new = PyArray_NewCopy((PyArrayObject *)obj, PyArray_ANYORDER); + new = PyArray_NewCopy((PyArrayObject *)obj, NPY_ANYORDER); Py_DECREF(obj); obj = new; } @@ -1731,10 +1729,10 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) } /* - * Can't cast unless ndim-0 array, FORCECAST is specified + * Can't cast unless ndim-0 array, NPY_FORCECAST is specified * or the cast is safe. */ - if (!(flags & FORCECAST) && !PyArray_NDIM(arr) == 0 && + if (!(flags & NPY_FORCECAST) && !PyArray_NDIM(arr) == 0 && !PyArray_CanCastTo(oldtype, newtype)) { Py_DECREF(newtype); PyErr_SetString(PyExc_TypeError, @@ -1744,26 +1742,27 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) } /* Don't copy if sizes are compatible */ - if ((flags & ENSURECOPY) || PyArray_EquivTypes(oldtype, newtype)) { + if ((flags & NPY_ENSURECOPY) || PyArray_EquivTypes(oldtype, newtype)) { arrflags = arr->flags; - if (arr->nd <= 1 && (flags & FORTRAN)) { - flags |= CONTIGUOUS; + if (arr->nd <= 1 && (flags & NPY_F_CONTIGUOUS)) { + flags |= NPY_C_CONTIGUOUS; } - copy = (flags & ENSURECOPY) || - ((flags & CONTIGUOUS) && (!(arrflags & CONTIGUOUS))) - || ((flags & ALIGNED) && (!(arrflags & ALIGNED))) + copy = (flags & NPY_ENSURECOPY) || + ((flags & NPY_C_CONTIGUOUS) && (!(arrflags & NPY_C_CONTIGUOUS))) + || ((flags & NPY_ALIGNED) && (!(arrflags & NPY_ALIGNED))) || (arr->nd > 1 && - ((flags & FORTRAN) && (!(arrflags & FORTRAN)))) - || ((flags & WRITEABLE) && (!(arrflags & WRITEABLE))); + ((flags & NPY_F_CONTIGUOUS) && + (!(arrflags & NPY_F_CONTIGUOUS)))) + || ((flags & NPY_WRITEABLE) && (!(arrflags & NPY_WRITEABLE))); if (copy) { - if ((flags & UPDATEIFCOPY) && + if ((flags & NPY_UPDATEIFCOPY) && (!PyArray_ISWRITEABLE(arr))) { Py_DECREF(newtype); PyErr_SetString(PyExc_ValueError, msg); return NULL; } - if ((flags & ENSUREARRAY)) { + if ((flags & NPY_ENSUREARRAY)) { subtype = &PyArray_Type; } ret = (PyArrayObject *) @@ -1771,7 +1770,7 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) arr->nd, arr->dimensions, NULL, NULL, - flags & FORTRAN, + flags & NPY_F_CONTIGUOUS, (PyObject *)arr); if (ret == NULL) { return NULL; @@ -1780,10 +1779,10 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) Py_DECREF(ret); return NULL; } - if (flags & UPDATEIFCOPY) { - ret->flags |= UPDATEIFCOPY; + if (flags & NPY_UPDATEIFCOPY) { + ret->flags |= NPY_UPDATEIFCOPY; ret->base = (PyObject *)arr; - PyArray_FLAGS(ret->base) &= ~WRITEABLE; + PyArray_FLAGS(ret->base) &= ~NPY_WRITEABLE; Py_INCREF(arr); } } @@ -1793,7 +1792,7 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) */ else { Py_DECREF(newtype); - if ((flags & ENSUREARRAY) && + if ((flags & NPY_ENSUREARRAY) && !PyArray_CheckExact(arr)) { Py_INCREF(arr->descr); ret = (PyArrayObject *) @@ -1821,20 +1820,20 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) * array type and copy was not specified */ else { - if ((flags & UPDATEIFCOPY) && + if ((flags & NPY_UPDATEIFCOPY) && (!PyArray_ISWRITEABLE(arr))) { Py_DECREF(newtype); PyErr_SetString(PyExc_ValueError, msg); return NULL; } - if ((flags & ENSUREARRAY)) { + if ((flags & NPY_ENSUREARRAY)) { subtype = &PyArray_Type; } ret = (PyArrayObject *) PyArray_NewFromDescr(subtype, newtype, arr->nd, arr->dimensions, NULL, NULL, - flags & FORTRAN, + flags & NPY_F_CONTIGUOUS, (PyObject *)arr); if (ret == NULL) { return NULL; @@ -1843,10 +1842,10 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) Py_DECREF(ret); return NULL; } - if (flags & UPDATEIFCOPY) { - ret->flags |= UPDATEIFCOPY; + if (flags & NPY_UPDATEIFCOPY) { + ret->flags |= NPY_UPDATEIFCOPY; ret->base = (PyObject *)arr; - PyArray_FLAGS(ret->base) &= ~WRITEABLE; + PyArray_FLAGS(ret->base) &= ~NPY_WRITEABLE; Py_INCREF(arr); } } @@ -1875,9 +1874,9 @@ PyArray_FromStructInterface(PyObject *input) if (inter->two != 2) { goto fail; } - if ((inter->flags & NOTSWAPPED) != NOTSWAPPED) { + if ((inter->flags & NPY_NOTSWAPPED) != NPY_NOTSWAPPED) { endian = PyArray_OPPBYTE; - inter->flags &= ~NOTSWAPPED; + inter->flags &= ~NPY_NOTSWAPPED; } if (inter->flags & ARR_HAS_DESCR) { @@ -1972,7 +1971,7 @@ PyArray_FromInterface(PyObject *input) if (res < 0) { goto fail; } - dataflags &= ~WRITEABLE; + dataflags &= ~NPY_WRITEABLE; } attr = PyDict_GetItemString(inter, "offset"); if (attr) { @@ -2017,7 +2016,7 @@ PyArray_FromInterface(PyObject *input) goto fail; } if (PyObject_IsTrue(PyTuple_GET_ITEM(attr,1))) { - dataflags &= ~WRITEABLE; + dataflags &= ~NPY_WRITEABLE; } } attr = tstr; @@ -2247,7 +2246,7 @@ PyArray_EnsureArray(PyObject *op) new = PyArray_FromScalar(op, NULL); } else { - new = PyArray_FromAny(op, NULL, 0, 0, ENSUREARRAY, NULL); + new = PyArray_FromAny(op, NULL, 0, 0, NPY_ENSUREARRAY, NULL); } Py_XDECREF(op); return new; @@ -3340,11 +3339,11 @@ PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type, } if (!write) { - ret->flags &= ~WRITEABLE; + ret->flags &= ~NPY_WRITEABLE; } /* Store a reference for decref on deallocation */ ret->base = buf; - PyArray_UpdateFlags(ret, ALIGNED); + PyArray_UpdateFlags(ret, NPY_ALIGNED); return (PyObject *)ret; } @@ -3561,9 +3560,9 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) * * If data is given, then flags is flags associated with data. * If strides is not given, then a contiguous strides array will be created - * and the CONTIGUOUS bit will be set. If the flags argument - * has the FORTRAN bit set, then a FORTRAN-style strides array will be - * created (and of course the FORTRAN flag bit will be set). + * and the NPY_C_CONTIGUOUS bit will be set. If the flags argument + * has the NPY_F_CONTIGUOUS bit set, then a FORTRAN-style strides array will be + * created (and of course the NPY_F_CONTIGUOUS flag bit will be set). * * If data is not given but created here, then flags will be DEFAULT * and a non-zero flags argument can be used to indicate a FORTRAN style @@ -3576,17 +3575,16 @@ _array_fill_strides(npy_intp *strides, npy_intp *dims, int nd, size_t itemsize, { int i; /* Only make Fortran strides if not contiguous as well */ - if ((inflag & FORTRAN) && !(inflag & CONTIGUOUS)) { + if ((inflag & (NPY_F_CONTIGUOUS|NPY_C_CONTIGUOUS)) == NPY_F_CONTIGUOUS) { for (i = 0; i < nd; i++) { strides[i] = itemsize; itemsize *= dims[i] ? dims[i] : 1; } - *objflags |= FORTRAN; if (nd > 1) { - *objflags &= ~CONTIGUOUS; + *objflags = ((*objflags)|NPY_F_CONTIGUOUS) & ~NPY_C_CONTIGUOUS; } else { - *objflags |= CONTIGUOUS; + *objflags |= (NPY_F_CONTIGUOUS|NPY_C_CONTIGUOUS); } } else { @@ -3594,12 +3592,11 @@ _array_fill_strides(npy_intp *strides, npy_intp *dims, int nd, size_t itemsize, strides[i] = itemsize; itemsize *= dims[i] ? dims[i] : 1; } - *objflags |= CONTIGUOUS; if (nd > 1) { - *objflags &= ~FORTRAN; + *objflags = ((*objflags)|NPY_C_CONTIGUOUS) & ~NPY_F_CONTIGUOUS; } else { - *objflags |= FORTRAN; + *objflags |= (NPY_C_CONTIGUOUS|NPY_F_CONTIGUOUS); } } return itemsize; diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index a3d704ac77d9..8db85bf8f0b5 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -824,7 +824,7 @@ array_ass_sub(PyArrayObject *self, PyObject *index, PyObject *op) * Doing "a[...] += 1" triggers assigning an array to itself, * so this check is needed. */ - if (self == op) { + if ((PyObject *)self == op) { return 0; } else { From 85993f895e7676f477f80e940a2ee925b3a0c19c Mon Sep 17 00:00:00 2001 From: Mark Wiebe Date: Mon, 31 Jan 2011 19:40:25 -0800 Subject: [PATCH 3/3] ENH: iter: Add timing code, rewrite some sections to be faster/more clear --- numpy/core/src/multiarray/new_iterator.c.src | 889 ++++++++++--------- 1 file changed, 488 insertions(+), 401 deletions(-) diff --git a/numpy/core/src/multiarray/new_iterator.c.src b/numpy/core/src/multiarray/new_iterator.c.src index 49d8ad8b5bc8..caf8afb5ade3 100644 --- a/numpy/core/src/multiarray/new_iterator.c.src +++ b/numpy/core/src/multiarray/new_iterator.c.src @@ -18,6 +18,32 @@ #include "lowlevel_strided_loops.h" +/********** ITERATOR CONSTRUCTION TIMING **************/ +#define NPY_IT_CONSTRUCTION_TIMING 0 + +#if NPY_IT_CONSTRUCTION_TIMING +#define NPY_IT_TIME_POINT(var) { \ + unsigned int hi, lo; \ + __asm__ __volatile__ ( \ + "rdtsc" \ + : "=d" (hi), "=a" (lo)); \ + var = (((unsigned long long)hi) << 32) | lo; \ + } +#define NPY_IT_PRINT_TIME_START(var) { \ + printf("%30s: start\n", #var); \ + c_temp = var; \ + } +#define NPY_IT_PRINT_TIME_VAR(var) { \ + printf("%30s: %6.0f clocks\n", #var, \ + ((double)(var-c_temp))); \ + c_temp = var; \ + } +#else +#define NPY_IT_TIME_POINT(var) +#endif + +/******************************************************/ + /********** PRINTF DEBUG TRACING **************/ #define NPY_IT_DBG_TRACING 0 @@ -371,6 +397,28 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, double subtype_priority = NPY_PRIORITY; PyTypeObject *subtype = &PyArray_Type; +#if NPY_IT_CONSTRUCTION_TIMING + npy_intp c_temp, + c_start, + c_check_op_axes, + c_check_global_flags, + c_calculate_ndim, + c_malloc, + c_prepare_operands, + c_fill_axisdata, + c_compute_index_strides, + c_apply_forced_iteration_order, + c_find_best_axis_ordering, + c_get_priority_subtype, + c_find_output_common_dtype, + c_check_casting, + c_allocate_arrays, + c_coalesce_axes, + c_prepare_buffers; +#endif + + NPY_IT_TIME_POINT(c_start); + if (niter > NPY_MAXARGS) { PyErr_Format(PyExc_ValueError, "Cannot construct an iterator with more than %d operands " @@ -383,11 +431,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_check_op_axes); + /* Check the global iterator flags */ if (!npyiter_check_global_flags(flags, &itflags)) { return NULL; } + NPY_IT_TIME_POINT(c_check_global_flags); + /* Calculate how many dimensions the iterator should have */ ndim = npyiter_calculate_ndim(niter, op_in, oa_ndim); @@ -397,10 +449,14 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, ndim = 1; } + NPY_IT_TIME_POINT(c_calculate_ndim); + /* Allocate memory for the iterator */ iter = (NpyIter*) PyArray_malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, niter)); + NPY_IT_TIME_POINT(c_malloc); + /* Fill in the basic data */ NIT_ITFLAGS(iter) = itflags; NIT_NDIM(iter) = ndim; @@ -424,6 +480,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, /* Set resetindex to zero as well (it's just after the resetdataptr) */ op_dataptr[niter] = 0; + NPY_IT_TIME_POINT(c_prepare_operands); + /* * Initialize buffer data (must set the buffers and transferdata * to NULL before we might deallocate the iterator). @@ -443,6 +501,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_fill_axisdata); + if (itflags&NPY_ITFLAG_BUFFER) { /* * If buffering is enabled and no buffersize was given, use a default @@ -466,6 +526,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, */ npyiter_compute_index_strides(iter, flags); + NPY_IT_TIME_POINT(c_compute_index_strides); + /* Initialize the perm to the identity */ perm = NIT_PERM(iter); for(idim = 0; idim < ndim; ++idim) { @@ -478,6 +540,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, npyiter_apply_forced_iteration_order(iter, order); itflags = NIT_ITFLAGS(iter); + NPY_IT_TIME_POINT(c_apply_forced_iteration_order); + /* Set some flags for allocated outputs */ for (iiter = 0; iiter < niter; ++iiter) { if (op[iiter] == NULL) { @@ -515,11 +579,15 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, itflags = NIT_ITFLAGS(iter); } + NPY_IT_TIME_POINT(c_find_best_axis_ordering); + if (need_subtype) { npyiter_get_priority_subtype(niter, op, op_itflags, &subtype_priority, &subtype); } + NPY_IT_TIME_POINT(c_get_priority_subtype); + /* * If an automatically allocated output didn't have a specified * dtype, we need to figure it out now, before allocating the outputs. @@ -544,9 +612,11 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, NPY_IT_DBG_PRINTF("Iterator: Replacing all data types\n"); /* Replace all the data types */ for (iiter = 0; iiter < niter; ++iiter) { - Py_XDECREF(op_dtype[iiter]); - Py_INCREF(dtype); - op_dtype[iiter] = dtype; + if (op_dtype[iiter] != dtype) { + Py_XDECREF(op_dtype[iiter]); + Py_INCREF(dtype); + op_dtype[iiter] = dtype; + } } } else { @@ -562,6 +632,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, Py_DECREF(dtype); } + NPY_IT_TIME_POINT(c_find_output_common_dtype); + /* * All of the data types have been settled, so it's time * to check that data type conversions are following the @@ -572,6 +644,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_check_casting); + /* * At this point, the iteration order has been finalized. so * any allocation of ops that were NULL, or any temporary @@ -584,6 +658,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, return NULL; } + NPY_IT_TIME_POINT(c_allocate_arrays); + /* * Finally, if coords weren't requested, * it may be possible to coalesce some axes together. @@ -602,6 +678,8 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, op_dataptr = NIT_RESETDATAPTR(iter); } + NPY_IT_TIME_POINT(c_coalesce_axes); + /* * Now that the axes are finished, check whether we can apply * the single iteration optimization to the iternext function. @@ -657,6 +735,29 @@ NpyIter_MultiNew(npy_intp niter, PyArrayObject **op_in, npy_uint32 flags, } } + NPY_IT_TIME_POINT(c_prepare_buffers); + +#if NPY_IT_CONSTRUCTION_TIMING + printf("\nIterator construction timing:\n"); + NPY_IT_PRINT_TIME_START(c_start); + NPY_IT_PRINT_TIME_VAR(c_check_op_axes); + NPY_IT_PRINT_TIME_VAR(c_check_global_flags); + NPY_IT_PRINT_TIME_VAR(c_calculate_ndim); + NPY_IT_PRINT_TIME_VAR(c_malloc); + NPY_IT_PRINT_TIME_VAR(c_prepare_operands); + NPY_IT_PRINT_TIME_VAR(c_fill_axisdata); + NPY_IT_PRINT_TIME_VAR(c_compute_index_strides); + NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order); + NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering); + NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype); + NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype); + NPY_IT_PRINT_TIME_VAR(c_check_casting); + NPY_IT_PRINT_TIME_VAR(c_allocate_arrays); + NPY_IT_PRINT_TIME_VAR(c_coalesce_axes); + NPY_IT_PRINT_TIME_VAR(c_prepare_buffers); + printf("\n"); +#endif + return iter; } @@ -3242,297 +3343,200 @@ npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, char *op_itflags, npy_intp iiter, niter = NIT_NITER(iter); npy_intp ondim; - char *odataptr; - NpyIter_AxisData *axisdata0, *axisdata; + NpyIter_AxisData *axisdata; npy_intp sizeof_axisdata; - PyArrayObject **op = NIT_OPERANDS(iter); - - axisdata0 = NIT_AXISDATA(iter); - sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - - /* Process the first operand */ - if (op_axes == NULL || op_axes[0] == NULL) { - /* Default broadcasting rules if op_axes is not specified */ - axisdata = axisdata0; - ondim = (op[0] == NULL) ? 0 : PyArray_NDIM(op[0]); - odataptr = op_dataptr[0]; - /* Possible if op_axes are being used, but op_axes[0] is NULL */ - if (ondim > ndim) { - PyErr_SetString(PyExc_ValueError, - "Iterator input has more dimensions than allowed " - "by the 'op_axes' specified"); - return 0; - } - for (idim = 0; idim < ondim; ++idim) { - npy_intp shape; - - /* op[0] != NULL, because we set ondim to 0 in that case */ - shape = PyArray_DIM(op[0], ondim-idim-1); - - NAD_SHAPE(axisdata) = shape; - NAD_COORD(axisdata) = 0; - if (shape == 1) { - NAD_STRIDES(axisdata)[0] = 0; - } - else { - NAD_STRIDES(axisdata)[0] = PyArray_STRIDE(op[0], ondim-idim-1); - } - NAD_PTRS(axisdata)[0] = odataptr; - - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - for (idim = ondim; idim < ndim; ++idim) { - NAD_SHAPE(axisdata) = 1; - NAD_COORD(axisdata) = 0; - NAD_STRIDES(axisdata)[0] = 0; - NAD_PTRS(axisdata)[0] = odataptr; + PyArrayObject **op = NIT_OPERANDS(iter), *op_cur; + npy_intp broadcast_shape[NPY_MAXDIMS]; - NIT_ADVANCE_AXISDATA(axisdata, 1); - } + /* First broadcast the shapes together */ + for (idim = 0; idim < ndim; ++idim) { + broadcast_shape[idim] = 1; } - else { - npy_intp *axes = op_axes[0]; - - /* Use op_axes to choose the axes */ - axisdata = axisdata0; - ondim = (op[0] == NULL) ? ndim : PyArray_NDIM(op[0]); - odataptr = op_dataptr[0]; - for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[ndim-idim-1]; - if (i < 0) { - NAD_SHAPE(axisdata) = 1; - NAD_COORD(axisdata) = 0; - NAD_STRIDES(axisdata)[0] = 0; - NAD_PTRS(axisdata)[0] = odataptr; - } - else if (i < ondim) { - npy_intp shape; - - if (op[0] != NULL) { - shape = PyArray_DIM(op[0], i); - } - else { - shape = 1; - } + for (iiter = 0; iiter < niter; ++iiter) { + op_cur = op[iiter]; + if (op_cur != NULL) { + npy_intp *shape = PyArray_DIMS(op_cur); + ondim = PyArray_NDIM(op_cur); - NAD_SHAPE(axisdata) = shape; - NAD_COORD(axisdata) = 0; - if (shape == 1) { - NAD_STRIDES(axisdata)[0] = 0; + if (op_axes == NULL || op_axes[iiter] == NULL) { + /* + * Possible if op_axes are being used, but + * op_axes[iiter] is NULL + */ + if (ondim > ndim) { + PyErr_SetString(PyExc_ValueError, + "input operand has more dimensions than allowed " + "by the axis remapping"); + return 0; } - else { - NAD_STRIDES(axisdata)[0] = PyArray_STRIDE(op[0], i); + for (idim = 0; idim < ondim; ++idim) { + npy_intp bshape = broadcast_shape[idim+ndim-ondim], + op_shape = shape[idim]; + if (bshape == 1) { + broadcast_shape[idim+ndim-ondim] = op_shape; + } + else if (bshape != op_shape && op_shape != 1) { + goto broadcast_error; + } } - NAD_PTRS(axisdata)[0] = odataptr; } else { - PyErr_Format(PyExc_ValueError, - "Iterator input op_axes[0][%d] (==%d) is not a valid " - "axis of op[0], which has %d dimensions", - (int)(ndim-idim-1), (int)i, (int)ondim); - return 0; + npy_intp *axes = op_axes[iiter]; + for (idim = 0; idim < ndim; ++idim) { + npy_intp i = axes[idim]; + if (i >= 0) { + if (i < ondim) { + npy_intp bshape = broadcast_shape[idim], + op_shape = shape[i]; + if (bshape == 1) { + broadcast_shape[idim] = op_shape; + } + else if (bshape != op_shape && op_shape != 1) { + goto broadcast_error; + } + } + else { + PyErr_Format(PyExc_ValueError, + "Iterator input op_axes[%d][%d] (==%d) " + "is not a valid axis of op[%d], which " + "has %d dimensions ", + (int)iiter, (int)(ndim-idim-1), (int)i, + (int)iiter, (int)ondim); + return 0; + } + } + } } - - NIT_ADVANCE_AXISDATA(axisdata, 1); } } - /* - * Process the rest of the operands, using the broadcasting rules - * to combine them. - */ - for (iiter = 1; iiter < niter; ++iiter) { - if (op_axes == NULL || op_axes[iiter] == NULL) { - axisdata = axisdata0; - ondim = (op[iiter] == NULL) ? 0 : PyArray_NDIM(op[iiter]); - odataptr = op_dataptr[iiter]; - /* Possible if op_axes are being used, but op_axes[iiter] is NULL */ - if (ondim > ndim) { - PyErr_SetString(PyExc_ValueError, - "input operand has more dimensions than allowed " - "by the axis remapping"); - return 0; - } - for (idim = 0; idim < ondim; ++idim) { - npy_intp shape; - - /* op[iiter] != NULL, because we set ondim to 0 in that case */ - shape = PyArray_DIM(op[iiter], ondim-idim-1); + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - if (shape == 1) { - NAD_STRIDES(axisdata)[iiter] = 0; - } - else { - if (NAD_SHAPE(axisdata) == 1) { - NAD_SHAPE(axisdata) = shape; - } - else if (NAD_SHAPE(axisdata) != shape) { - goto broadcast_error; - } - NAD_STRIDES(axisdata)[iiter] = PyArray_STRIDE( - op[iiter], ondim-idim-1); - } - NAD_PTRS(axisdata)[iiter] = odataptr; + /* Now process the operands, filling in the axisdata */ + for (idim = 0; idim < ndim; ++idim) { + npy_intp bshape = broadcast_shape[ndim-idim-1]; + npy_intp *strides = NAD_STRIDES(axisdata); - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - for (idim = ondim; idim < ndim; ++idim) { - NAD_STRIDES(axisdata)[iiter] = 0; - NAD_PTRS(axisdata)[iiter] = odataptr; + NAD_SHAPE(axisdata) = bshape; + NAD_COORD(axisdata) = 0; + memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*niter); - NIT_ADVANCE_AXISDATA(axisdata, 1); - } - } - else { - npy_intp *axes = op_axes[iiter]; + for (iiter = 0; iiter < niter; ++iiter) { + op_cur = op[iiter]; - /* Use op_axes to choose the axes */ - axisdata = axisdata0; - ondim = (op[iiter] == NULL) ? ndim : PyArray_NDIM(op[iiter]); - odataptr = op_dataptr[iiter]; - for (idim = 0; idim < ndim; ++idim) { - npy_intp i = axes[ndim-idim-1]; - if (i < 0) { - NAD_STRIDES(axisdata)[iiter] = 0; - NAD_PTRS(axisdata)[iiter] = odataptr; + if (op_axes == NULL || op_axes[iiter] == NULL) { + if (op_cur == NULL) { + strides[iiter] = 0; } - else if (i < ondim) { - npy_intp shape; - - if (op[iiter] != NULL) { - shape = PyArray_DIM(op[iiter], i); - } - else { - shape = 1; - } - - if (shape == 1) { - NAD_STRIDES(axisdata)[iiter] = 0; + else { + ondim = PyArray_NDIM(op_cur); + if (bshape == 1) { + strides[iiter] = 0; + if (idim >= ondim && !output_scalars && + (op_flags[iiter]&NPY_ITER_NO_BROADCAST)) { + goto operand_different_than_broadcast; + } } - else { - if (NAD_SHAPE(axisdata) == 1) { - NAD_SHAPE(axisdata) = shape; + else if (idim >= ondim || + PyArray_DIM(op_cur, ondim-idim-1) == 1) { + strides[iiter] = 0; + if (op_flags[iiter]&NPY_ITER_NO_BROADCAST) { + goto operand_different_than_broadcast; } - else if (NAD_SHAPE(axisdata) != shape) { - goto broadcast_error; + /* If it's writeable, this means a reduction */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "reduction is not enabled"); + return 0; + } + if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "is flagged as write-only, not " + "read-write"); + return 0; + } + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; } - NAD_STRIDES(axisdata)[iiter] = - PyArray_STRIDE(op[iiter], i); } - NAD_PTRS(axisdata)[iiter] = odataptr; - } - else { - PyErr_Format(PyExc_ValueError, - "Iterator input op_axes[%d][%d] (==%d) is not a " - "valid axis of op[%d], which has %d dimensions ", - (int)iiter, (int)(ndim-idim-1), (int)i, - (int)iiter, (int)ondim); - return 0; + else { + strides[iiter] = PyArray_STRIDE(op_cur, ondim-idim-1); + } } - - NIT_ADVANCE_AXISDATA(axisdata, 1); } - } - } - - /* Go through and check for operands for broadcasting and reduction */ - for (iiter = 0; iiter < niter; ++iiter) { - if (op[iiter] != NULL) { - /* - * If broadcasting is disallowed for this operand, - * unless scalars are output, which means all operands are scalar - * and no broadcasting errors could occur - */ - if ((op_flags[iiter]&NPY_ITER_NO_BROADCAST) && !output_scalars) { - npy_intp *axes; - - axes = op_axes ? op_axes[iiter] : NULL; - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim) { - npy_intp i; - if (axes) { - i = axes[ndim-idim-1]; - } - else { - i = ndim-idim-1; + else { + npy_intp *axes = op_axes[iiter]; + npy_intp i = axes[ndim-idim-1]; + if (i >= 0) { + if (bshape == 1 || op_cur == NULL) { + strides[iiter] = 0; } - if (i >= 0 && i < PyArray_NDIM(op[iiter])) { - if (PyArray_DIM(op[iiter], i) != NAD_SHAPE(axisdata)) { + else if (PyArray_DIM(op_cur, i) == 1) { + strides[iiter] = 0; + if (op_flags[iiter]&NPY_ITER_NO_BROADCAST) { goto operand_different_than_broadcast; } - } - /* - * Also disallow broadcasting by adding additional - * dimensions, unless that part of the broadcasting - * was done explicitly through op_axes. - */ - else if (!axes) { - goto operand_different_than_broadcast; - } - /* If its writeable, this may cause a reduction */ - else if ((op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) && - NAD_SHAPE(axisdata) > 1) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " - "reduction is not enabled"); - return 0; - } - if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { - PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " - "is flagged as write-only, not " - "read-write"); - return 0; + /* If it's writeable, this means a reduction */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "reduction is not enabled"); + return 0; + } + if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output operand requires a reduction, but " + "is flagged as write-only, not " + "read-write"); + return 0; + } + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; } - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; } - - NIT_ADVANCE_AXISDATA(axisdata, 1); + else { + strides[iiter] = PyArray_STRIDE(op_cur, i); + } } - } - /* Check whether this operand includes any reduction */ - else if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim) { - /* - * If the stride is 0 and the shape is bigger than - * one, that's a reduction. - */ - if (NAD_SHAPE(axisdata) > 1 && - NAD_STRIDES(axisdata)[iiter] == 0) { + else if (bshape == 1) { + strides[iiter] = 0; + } + else { + strides[iiter] = 0; + /* If it's writeable, this means a reduction */ + if (op_itflags[iiter]&NPY_OP_ITFLAG_WRITE) { if (!(flags&NPY_ITER_REDUCE_OK)) { PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " + "output operand requires a reduction, but " "reduction is not enabled"); return 0; } if (!(op_itflags[iiter]&NPY_OP_ITFLAG_READ)) { PyErr_SetString(PyExc_ValueError, - "operand requires a reduction, but " + "output operand requires a reduction, but " "is flagged as write-only, not " "read-write"); return 0; } NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; op_itflags[iiter] |= NPY_OP_ITFLAG_REDUCE; - break; } - - NIT_ADVANCE_AXISDATA(axisdata, 1); } } } + + NIT_ADVANCE_AXISDATA(axisdata, 1); } /* Now fill in the ITERSIZE member */ - NIT_ITERSIZE(iter) = 1; - axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim) { - NIT_ITERSIZE(iter) *= NAD_SHAPE(axisdata); - - NIT_ADVANCE_AXISDATA(axisdata, 1); + NIT_ITERSIZE(iter) = broadcast_shape[0]; + for (idim = 1; idim < ndim; ++idim) { + NIT_ITERSIZE(iter) *= broadcast_shape[idim]; } /* The range defaults to everything */ NIT_ITERSTART(iter) = 0; @@ -3740,38 +3744,62 @@ npyiter_replace_axisdata(NpyIter *iter, npy_intp iiter, */ axisdata = axisdata0; - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - char p; - npy_intp i, shape; + if (op_axes != NULL) { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; + npy_intp i, shape; - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = ndim+p; - } - else { - i = ndim-p-1; - } + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_axes[ndim+p]; + } + else { + i = op_axes[ndim-p-1]; + } - /* Apply op_axes */ - if (op_axes != NULL) { - i = op_axes[i]; - } - else { - i -= (ndim - op_ndim); + if ((npy_uintp)i < (npy_uintp)op_ndim) { + shape = PyArray_DIM(op, i); + if (shape != 1) { + npy_intp stride = PyArray_STRIDE(op, i); + if (p < 0) { + /* If the perm entry is negative, flip the axis */ + NAD_STRIDES(axisdata)[iiter] = -stride; + baseoffset += stride*(shape-1); + } + else { + NAD_STRIDES(axisdata)[iiter] = stride; + } + } + } } + } + else { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; + npy_intp i, shape; - if (i >= 0 && i < op_ndim) { - shape = PyArray_DIM(op, i); - if (shape != 1) { - npy_intp stride = PyArray_STRIDE(op, i); - if (p < 0) { - /* If the perm entry is negative, flip the axis */ - NAD_STRIDES(axisdata)[iiter] = -stride; - baseoffset += stride*(shape-1); - } - else { - NAD_STRIDES(axisdata)[iiter] = stride; + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_ndim+p; + } + else { + i = op_ndim-p-1; + } + + if (i >= 0) { + shape = PyArray_DIM(op, i); + if (shape != 1) { + npy_intp stride = PyArray_STRIDE(op, i); + if (p < 0) { + /* If the perm entry is negative, flip the axis */ + NAD_STRIDES(axisdata)[iiter] = -stride; + baseoffset += stride*(shape-1); + } + else { + NAD_STRIDES(axisdata)[iiter] = stride; + } } } } @@ -4266,119 +4294,149 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS], stride = op_dtype->elsize; char reversestride[NPY_MAXDIMS], anyreverse = 0; - NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - npy_intp tmp_op_axes = -1; - npy_intp i, array_i[NPY_MAXDIMS]; + NpyIter_AxisData *axisdata; + npy_intp sizeof_axisdata; + npy_intp i; PyArrayObject *ret; - /* - * If it's an automatically allocated output, start by assuming - * the shape will have the same length as the iterator - */ - if (shape == NULL) { - /* - * If it's a scalar output, trigger scalar allocation below - * by making an op_axes of -1 - */ - if (op_ndim == 0 && ndim == 1 && op_axes == NULL) { - op_axes = &tmp_op_axes; + /* If it's a scalar, don't need to check the axes */ + if (op_ndim == 0) { + Py_INCREF(op_dtype); + ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0, + NULL, NULL, NULL, 0, NULL); + + /* Double-check that the subtype didn't mess with the dimensions */ + if (PyArray_NDIM(ret) != 0) { + PyErr_SetString(PyExc_RuntimeError, + "Iterator automatic output has an array subtype " + "which changed the dimensions of the output"); + Py_DECREF(ret); + return NULL; } - op_ndim = ndim; - } - /* Initially no strides have been set */ - for (idim = 0; idim < op_ndim; ++idim) { - strides[idim] = NPY_MAX_INTP; - reversestride[idim] = 0; + return ret; } - for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { - char p; - - /* Apply the perm to get the original axis */ - p = perm[idim]; - if (p < 0) { - i = ndim+p; - } - else { - i = ndim-p-1; - } + axisdata = NIT_AXISDATA(iter); + sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - /* Apply op_axes */ - if (op_axes != NULL) { - i = op_axes[i]; - } - else { - i -= (ndim - op_ndim); - } + memset(reversestride, 0, NPY_MAXDIMS); + /* Initialize the strides to invalid values */ + for (i = 0; i < NPY_MAXDIMS; ++i) { + strides[i] = NPY_MAX_INTP; + } - array_i[idim] = i; + if (op_axes != NULL) { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; - if (i >= 0) { - NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d " - "for iterator dimension %d to %d\n", (int)i, - (int)idim, (int)stride); - strides[i] = stride; + /* Apply the perm to get the original axis */ + p = perm[idim]; if (p < 0) { - reversestride[i] = 1; - anyreverse = 1; - } - if (shape == NULL) { - new_shape[i] = NAD_SHAPE(axisdata); - stride *= new_shape[i]; + i = op_axes[ndim+p]; } else { - stride *= shape[i]; + i = op_axes[ndim-p-1]; } - } - } - /* - * If custom axes were specified, some dimensions may not have been used. - * Add the REDUCE itflag if this creates a reduction situation. - */ - if (shape == NULL) { - axisdata = NIT_AXISDATA(iter); - for (idim = 0; idim < op_ndim; ++idim) { - i = array_i[idim]; - NPY_IT_DBG_PRINTF("Iterator: Checking output " - "dimension %d (iterator dim %d) with stride %d\n", - (int)i, (int)idim, (int)strides[idim]); - if (i < 0) { - NPY_IT_DBG_PRINTF("Iterator: The axis wasn't used, " - "and its dimension is %d\n", - (int)NAD_SHAPE(axisdata)); - /* - * If deleting this axis produces a reduction, but - * reduction wasn't enabled, throw an error - */ - if (NAD_SHAPE(axisdata) != 1) { - if (!(flags&NPY_ITER_REDUCE_OK)) { - PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "reduction is not enabled"); - return NULL; - } - if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) { + if (i >= 0) { + NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d " + "for iterator dimension %d to %d\n", (int)i, + (int)idim, (int)stride); + strides[i] = stride; + if (p < 0) { + reversestride[i] = 1; + anyreverse = 1; + } + else { + reversestride[i] = 0; + } + if (shape == NULL) { + new_shape[i] = NAD_SHAPE(axisdata); + stride *= new_shape[i]; + if (i >= ndim) { PyErr_SetString(PyExc_ValueError, - "output requires a reduction, but " - "is flagged as write-only, not read-write"); + "automatically allocated output array " + "specified with an inconsistent axis mapping"); return NULL; } + } + else { + stride *= shape[i]; + } + } + else { + if (shape == NULL) { + /* + * If deleting this axis produces a reduction, but + * reduction wasn't enabled, throw an error + */ + if (NAD_SHAPE(axisdata) != 1) { + if (!(flags&NPY_ITER_REDUCE_OK)) { + PyErr_SetString(PyExc_ValueError, + "output requires a reduction, but " + "reduction is not enabled"); + return NULL; + } + if (!((*op_itflags)&NPY_OP_ITFLAG_READ)) { + PyErr_SetString(PyExc_ValueError, + "output requires a reduction, but " + "is flagged as write-only, not read-write"); + return NULL; + } - NPY_IT_DBG_PRINTF("Iterator: Indicating that a reduction " - "is occurring\n"); - /* Indicate that a reduction is occurring */ - NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; - (*op_itflags) |= NPY_OP_ITFLAG_REDUCE; + NPY_IT_DBG_PRINTF("Iterator: Indicating that a reduction " + "is occurring\n"); + /* Indicate that a reduction is occurring */ + NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE; + (*op_itflags) |= NPY_OP_ITFLAG_REDUCE; + } } } + } + } + else { + for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) { + char p; - NIT_ADVANCE_AXISDATA(axisdata, 1); + /* Apply the perm to get the original axis */ + p = perm[idim]; + if (p < 0) { + i = op_ndim+p; + } + else { + i = op_ndim-p-1; + } + + if (i >= 0) { + NPY_IT_DBG_PRINTF("Iterator: Setting allocated stride %d " + "for iterator dimension %d to %d\n", (int)i, + (int)idim, (int)stride); + strides[i] = stride; + if (p < 0) { + reversestride[i] = 1; + anyreverse = 1; + } + else { + reversestride[i] = 0; + } + if (shape == NULL) { + new_shape[i] = NAD_SHAPE(axisdata); + stride *= new_shape[i]; + } + else { + stride *= shape[i]; + } + } } + } + /* + * If custom axes were specified, some dimensions may not have been used. + * Add the REDUCE itflag if this creates a reduction situation. + */ + if (shape == NULL) { /* Ensure there are no dimension gaps in op_axes, and find op_ndim */ op_ndim = ndim; if (op_axes != NULL) { @@ -4395,7 +4453,7 @@ npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype, */ else if (op_ndim != ndim) { PyErr_SetString(PyExc_ValueError, - "automatically allocated reduction output array " + "automatically allocated output array " "specified with an inconsistent axis mapping"); return NULL; } @@ -4682,49 +4740,54 @@ npyiter_allocate_arrays(NpyIter *iter, * the inner stride of this operand works for the whole * array, we can set NPY_OP_ITFLAG_BUFNEVER. */ - if (PyArray_NDIM(op[iiter]) > 0 && (itflags&NPY_ITFLAG_BUFFER) && - !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) { - npy_intp stride, shape, innerstride = 0, innershape; + if ((itflags&NPY_ITFLAG_BUFFER) && !(op_itflags[iiter]&NPY_OP_ITFLAG_CAST)) { NpyIter_AxisData *axisdata = NIT_AXISDATA(iter); - npy_intp sizeof_axisdata = - NIT_AXISDATA_SIZEOF(itflags, ndim, niter); - /* Find stride of the first non-empty shape */ - for (idim = 0; idim < ndim; ++idim) { - innershape = NAD_SHAPE(axisdata); - if (innershape != 1) { - innerstride = NAD_STRIDES(axisdata)[iiter]; - break; - } - NIT_ADVANCE_AXISDATA(axisdata, 1); + if (ndim == 1) { + op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; + NBF_STRIDES(bufferdata)[iiter] = NAD_STRIDES(axisdata)[iiter]; } - ++idim; - NIT_ADVANCE_AXISDATA(axisdata, 1); - /* Check that everything could have coalesced together */ - for (; idim < ndim; ++idim) { - stride = NAD_STRIDES(axisdata)[iiter]; - shape = NAD_SHAPE(axisdata); - if (shape != 1) { - /* - * If N times the inner stride doesn't equal this - * stride, the multi-dimensionality is needed. - */ - if (innerstride*innershape != stride) { + else if (PyArray_NDIM(op[iiter]) > 0) { + npy_intp stride, shape, innerstride = 0, innershape; + npy_intp sizeof_axisdata = + NIT_AXISDATA_SIZEOF(itflags, ndim, niter); + /* Find stride of the first non-empty shape */ + for (idim = 0; idim < ndim; ++idim) { + innershape = NAD_SHAPE(axisdata); + if (innershape != 1) { + innerstride = NAD_STRIDES(axisdata)[iiter]; break; } - else { - innershape *= shape; - } + NIT_ADVANCE_AXISDATA(axisdata, 1); } + ++idim; NIT_ADVANCE_AXISDATA(axisdata, 1); - } - /* - * If we looped all the way to the end, one stride works. - * Set that stride, because it may not belong to the first - * dimension. - */ - if (idim == ndim) { - op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; - NBF_STRIDES(bufferdata)[iiter] = innerstride; + /* Check that everything could have coalesced together */ + for (; idim < ndim; ++idim) { + stride = NAD_STRIDES(axisdata)[iiter]; + shape = NAD_SHAPE(axisdata); + if (shape != 1) { + /* + * If N times the inner stride doesn't equal this + * stride, the multi-dimensionality is needed. + */ + if (innerstride*innershape != stride) { + break; + } + else { + innershape *= shape; + } + } + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + /* + * If we looped all the way to the end, one stride works. + * Set that stride, because it may not belong to the first + * dimension. + */ + if (idim == ndim) { + op_itflags[iiter] |= NPY_OP_ITFLAG_BUFNEVER; + NBF_STRIDES(bufferdata)[iiter] = innerstride; + } } } } @@ -4771,6 +4834,7 @@ npyiter_get_common_dtype(npy_intp niter, PyArrayObject **op, npy_intp narrs = 0, ndtypes = 0; PyArrayObject *arrs[NPY_MAXARGS]; PyArray_Descr *dtypes[NPY_MAXARGS]; + PyArray_Descr *ret; NPY_IT_DBG_PRINTF("Iterator: Getting a common data type from operands\n"); @@ -4790,7 +4854,30 @@ npyiter_get_common_dtype(npy_intp niter, PyArrayObject **op, } } - return PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + if (narrs == 0) { + npy_intp i; + ret = dtypes[0]; + for (i = 1; i < ndtypes; ++i) { + if (ret != dtypes[i]) + break; + } + if (i == ndtypes) { + if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) { + Py_INCREF(ret); + } + else { + ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE); + } + } + else { + ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + } + } + else { + ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes); + } + + return ret; } static int