Skip to content

ENH, MAINT: Refactor PyArray_InnerProduct to use PyArray_MatrixProduct2 #6968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 0 additions & 166 deletions numpy/core/src/multiarray/cblasfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,169 +745,3 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
Py_XDECREF(ret);
return NULL;
}


/*
* innerproduct(a,b)
*
* Returns the inner product of a and b for arrays of
* floating point types. Like the generic NumPy equivalent the product
* sum is over the last dimension of a and b.
* NB: The first argument is not conjugated.
*
* This is for use by PyArray_InnerProduct. It is assumed on entry that the
* arrays ap1 and ap2 have a common data type given by typenum that is
* float, double, cfloat, or cdouble and have dimension <= 2.
* The * __numpy_ufunc__ nonsense is also assumed to
* have been taken care of.
*/

NPY_NO_EXPORT PyObject *
cblas_innerproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2)
{
int j, l, lda, ldb;
int nd;
double prior1, prior2;
PyArrayObject *ret = NULL;
npy_intp dimensions[NPY_MAXDIMS];
PyTypeObject *subtype;

/* assure contiguous arrays */
if (!PyArray_IS_C_CONTIGUOUS(ap1)) {
PyObject *op1 = PyArray_NewCopy(ap1, NPY_CORDER);
Py_DECREF(ap1);
ap1 = (PyArrayObject *)op1;
if (ap1 == NULL) {
goto fail;
}
}
if (!PyArray_IS_C_CONTIGUOUS(ap2)) {
PyObject *op2 = PyArray_NewCopy(ap2, NPY_CORDER);
Py_DECREF(ap2);
ap2 = (PyArrayObject *)op2;
if (ap2 == NULL) {
goto fail;
}
}

if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
/* One of ap1 or ap2 is a scalar */
if (PyArray_NDIM(ap1) == 0) {
/* Make ap2 the scalar */
PyArrayObject *t = ap1;
ap1 = ap2;
ap2 = t;
}
for (l = 1, j = 0; j < PyArray_NDIM(ap1); j++) {
dimensions[j] = PyArray_DIM(ap1, j);
l *= dimensions[j];
}
nd = PyArray_NDIM(ap1);
}
else {
/*
* (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
* Both ap1 and ap2 are vectors or matrices
*/
l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1);

if (PyArray_DIM(ap2, PyArray_NDIM(ap2) - 1) != l) {
dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1,
ap2, PyArray_NDIM(ap2) - 1);
goto fail;
}
nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;

if (nd == 1)
dimensions[0] = (PyArray_NDIM(ap1) == 2) ?
PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 0);
else if (nd == 2) {
dimensions[0] = PyArray_DIM(ap1, 0);
dimensions[1] = PyArray_DIM(ap2, 0);
}
}

/* Choose which subtype to return */
prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
subtype = (prior2 > prior1 ? Py_TYPE(ap2) : Py_TYPE(ap1));

ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
typenum, NULL, NULL, 0, 0,
(PyObject *)
(prior2 > prior1 ? ap2 : ap1));

if (ret == NULL) {
goto fail;
}

NPY_BEGIN_ALLOW_THREADS;
memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret));

if (PyArray_NDIM(ap2) == 0) {
/* Multiplication by a scalar -- Level 1 BLAS */
if (typenum == NPY_DOUBLE) {
cblas_daxpy(l,
*((double *)PyArray_DATA(ap2)),
(double *)PyArray_DATA(ap1), 1,
(double *)PyArray_DATA(ret), 1);
}
else if (typenum == NPY_CDOUBLE) {
cblas_zaxpy(l,
(double *)PyArray_DATA(ap2),
(double *)PyArray_DATA(ap1), 1,
(double *)PyArray_DATA(ret), 1);
}
else if (typenum == NPY_FLOAT) {
cblas_saxpy(l,
*((float *)PyArray_DATA(ap2)),
(float *)PyArray_DATA(ap1), 1,
(float *)PyArray_DATA(ret), 1);
}
else if (typenum == NPY_CFLOAT) {
cblas_caxpy(l,
(float *)PyArray_DATA(ap2),
(float *)PyArray_DATA(ap1), 1,
(float *)PyArray_DATA(ret), 1);
}
}
else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 1) {
/* Dot product between two vectors -- Level 1 BLAS */
blas_dot(typenum, l,
PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1),
PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2),
PyArray_DATA(ret));
}
else if (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 1) {
/* Matrix-vector multiplication -- Level 2 BLAS */
lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
gemv(typenum, CblasRowMajor, CblasNoTrans, ap1, lda, ap2, 1, ret);
}
else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 2) {
/* Vector matrix multiplication -- Level 2 BLAS */
lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
gemv(typenum, CblasRowMajor, CblasNoTrans, ap2, lda, ap1, 1, ret);
}
else {
/*
* (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2)
* Matrix matrix multiplication -- Level 3 BLAS
*/
lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
gemm(typenum, CblasRowMajor, CblasNoTrans, CblasTrans,
PyArray_DIM(ap1, 0), PyArray_DIM(ap2, 0), PyArray_DIM(ap1, 1),
ap1, lda, ap2, ldb, ret);
}
NPY_END_ALLOW_THREADS;

Py_DECREF(ap1);
Py_DECREF(ap2);
return PyArray_Return(ret);

fail:
Py_XDECREF(ap1);
Py_XDECREF(ap2);
Py_XDECREF(ret);
return NULL;
}
3 changes: 0 additions & 3 deletions numpy/core/src/multiarray/cblasfuncs.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,4 @@
NPY_NO_EXPORT PyObject *
cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *);

NPY_NO_EXPORT PyObject *
cblas_innerproduct(int, PyArrayObject *, PyArrayObject *);

#endif
28 changes: 24 additions & 4 deletions numpy/core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -1926,14 +1926,34 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
/* Raise an error if the casting rule isn't followed */
if (!PyArray_CanCastArrayTo(arr, newtype, casting)) {
PyObject *errmsg;
PyArray_Descr *arr_descr = NULL;
PyObject *arr_descr_repr = NULL;
PyObject *newtype_repr = NULL;

PyErr_Clear();
errmsg = PyUString_FromString("Cannot cast array data from ");
PyUString_ConcatAndDel(&errmsg,
PyObject_Repr((PyObject *)PyArray_DESCR(arr)));
arr_descr = PyArray_DESCR(arr);
if (arr_descr == NULL) {
Py_DECREF(newtype);
Py_DECREF(errmsg);
return NULL;
}
arr_descr_repr = PyObject_Repr((PyObject *)arr_descr);
if (arr_descr_repr == NULL) {
Py_DECREF(newtype);
Py_DECREF(errmsg);
return NULL;
}
PyUString_ConcatAndDel(&errmsg, arr_descr_repr);
PyUString_ConcatAndDel(&errmsg,
PyUString_FromString(" to "));
PyUString_ConcatAndDel(&errmsg,
PyObject_Repr((PyObject *)newtype));
newtype_repr = PyObject_Repr((PyObject *)newtype);
if (newtype_repr == NULL) {
Py_DECREF(newtype);
Py_DECREF(errmsg);
return NULL;
}
PyUString_ConcatAndDel(&errmsg, newtype_repr);
PyUString_ConcatAndDel(&errmsg,
PyUString_FromFormat(" according to the rule %s",
npy_casting_to_string(casting)));
Expand Down
120 changes: 34 additions & 86 deletions numpy/core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -813,121 +813,69 @@ new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
NPY_NO_EXPORT PyObject *
PyArray_InnerProduct(PyObject *op1, PyObject *op2)
{
PyArrayObject *ap1, *ap2, *ret = NULL;
PyArrayIterObject *it1, *it2;
npy_intp i, j, l;
int typenum, nd, axis;
npy_intp is1, is2, os;
char *op;
npy_intp dimensions[NPY_MAXDIMS];
PyArray_DotFunc *dot;
PyArray_Descr *typec;
NPY_BEGIN_THREADS_DEF;
PyArrayObject *ap1 = NULL;
PyArrayObject *ap2 = NULL;
int typenum;
PyArray_Descr *typec = NULL;
PyObject* ap2t = NULL;
npy_intp dims[NPY_MAXDIMS];
PyArray_Dims newaxes = {dims, 0};
int i;
PyObject* ret = NULL;

typenum = PyArray_ObjectType(op1, 0);
typenum = PyArray_ObjectType(op2, typenum);

typec = PyArray_DescrFromType(typenum);
if (typec == NULL) {
return NULL;
goto fail;
}

Py_INCREF(typec);
ap1 = (PyArrayObject *)PyArray_FromAny(op1, typec, 0, 0,
NPY_ARRAY_ALIGNED, NULL);
NPY_ARRAY_ALIGNED, NULL);
if (ap1 == NULL) {
Py_DECREF(typec);
return NULL;
goto fail;
}
ap2 = (PyArrayObject *)PyArray_FromAny(op2, typec, 0, 0,
NPY_ARRAY_ALIGNED, NULL);
NPY_ARRAY_ALIGNED, NULL);
if (ap2 == NULL) {
Py_DECREF(ap1);
return NULL;
}

#if defined(HAVE_CBLAS)
if (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2 &&
(NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum ||
NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) {
return cblas_innerproduct(typenum, ap1, ap2);
}
#endif

if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
ret = (PyArray_NDIM(ap1) == 0 ? ap1 : ap2);
ret = (PyArrayObject *)Py_TYPE(ret)->tp_as_number->nb_multiply(
(PyObject *)ap1, (PyObject *)ap2);
Py_DECREF(ap1);
Py_DECREF(ap2);
return (PyObject *)ret;
}

l = PyArray_DIMS(ap1)[PyArray_NDIM(ap1) - 1];
if (PyArray_DIMS(ap2)[PyArray_NDIM(ap2) - 1] != l) {
dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1,
ap2, PyArray_NDIM(ap2) - 1);
goto fail;
}

nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
j = 0;
for (i = 0; i < PyArray_NDIM(ap1) - 1; i++) {
dimensions[j++] = PyArray_DIMS(ap1)[i];
newaxes.len = PyArray_NDIM(ap2);
if ((PyArray_NDIM(ap1) >= 1) && (newaxes.len >= 2)) {
for (i = 0; i < newaxes.len - 2; i++) {
dims[i] = (npy_intp)i;
}
dims[newaxes.len - 2] = newaxes.len - 1;
dims[newaxes.len - 1] = newaxes.len - 2;

ap2t = PyArray_Transpose(ap2, &newaxes);
if (ap2t == NULL) {
goto fail;
}
}
for (i = 0; i < PyArray_NDIM(ap2) - 1; i++) {
dimensions[j++] = PyArray_DIMS(ap2)[i];
else {
ap2t = (PyObject *)ap2;
Py_INCREF(ap2);
}

/*
* Need to choose an output array that can hold a sum
* -- use priority to determine which subtype.
*/
ret = new_array_for_sum(ap1, ap2, NULL, nd, dimensions, typenum);
ret = PyArray_MatrixProduct2((PyObject *)ap1, ap2t, NULL);
if (ret == NULL) {
goto fail;
}
/* Ensure that multiarray.inner(<Nx0>,<Mx0>) -> zeros((N,M)) */
if (PyArray_SIZE(ap1) == 0 && PyArray_SIZE(ap2) == 0) {
memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret));
}

dot = (PyArray_DESCR(ret)->f->dotfunc);
if (dot == NULL) {
PyErr_SetString(PyExc_ValueError,
"dot not available for this type");
goto fail;
}
is1 = PyArray_STRIDES(ap1)[PyArray_NDIM(ap1) - 1];
is2 = PyArray_STRIDES(ap2)[PyArray_NDIM(ap2) - 1];
op = PyArray_DATA(ret);
os = PyArray_DESCR(ret)->elsize;
axis = PyArray_NDIM(ap1) - 1;
it1 = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *)ap1, &axis);
axis = PyArray_NDIM(ap2) - 1;
it2 = (PyArrayIterObject *) PyArray_IterAllButAxis((PyObject *)ap2, &axis);
NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(ap2));
while (it1->index < it1->size) {
while (it2->index < it2->size) {
dot(it1->dataptr, is1, it2->dataptr, is2, op, l, ret);
op += os;
PyArray_ITER_NEXT(it2);
}
PyArray_ITER_NEXT(it1);
PyArray_ITER_RESET(it2);
}
NPY_END_THREADS_DESCR(PyArray_DESCR(ap2));
Py_DECREF(it1);
Py_DECREF(it2);
if (PyErr_Occurred()) {
goto fail;
}

Py_DECREF(ap1);
Py_DECREF(ap2);
return (PyObject *)ret;
Py_DECREF(ap2t);
return ret;

fail:
Py_XDECREF(ap1);
Py_XDECREF(ap2);
Py_XDECREF(ap2t);
Py_XDECREF(ret);
return NULL;
}
Expand Down
Loading