Skip to content

ENH: Add an out argument to concatenate #9209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/release/1.14.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ selected via the ``--fcompiler`` and ``--compiler`` options to
supported; by default a gfortran-compatible static archive
``openblas.a`` is looked for.

``concatenate`` and ``stack`` gained an ``out`` argument
--------------------------------------------------------
A preallocated buffer of the desired dtype can now be used for the output of
these functions.

``np.linalg.pinv`` now works on stacked matrices
------------------------------------------------
Previously it was limited to a single 2d array.
Expand Down
6 changes: 5 additions & 1 deletion numpy/add_newdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,7 +1158,7 @@ def luf(lamdaexpr, *args, **kwargs):

add_newdoc('numpy.core.multiarray', 'concatenate',
"""
concatenate((a1, a2, ...), axis=0)
concatenate((a1, a2, ...), axis=0, out=None)

Join a sequence of arrays along an existing axis.

Expand All @@ -1169,6 +1169,10 @@ def luf(lamdaexpr, *args, **kwargs):
corresponding to `axis` (the first, by default).
axis : int, optional
The axis along which the arrays will be joined. Default is 0.
out : ndarray, optional
If provided, the destination to place the result. The shape must be
correct, matching that of what concatenate would have returned if no
out argument were specified.

Returns
-------
Expand Down
8 changes: 6 additions & 2 deletions numpy/core/shape_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def hstack(tup):
return _nx.concatenate(arrs, 1)


def stack(arrays, axis=0):
def stack(arrays, axis=0, out=None):
"""
Join a sequence of arrays along a new axis.

Expand All @@ -309,6 +309,10 @@ def stack(arrays, axis=0):
Each array must have the same shape.
axis : int, optional
The axis in the result array along which the input arrays are stacked.
out : ndarray, optional
If provided, the destination to place the result. The shape must be
correct, matching that of what stack would have returned if no
out argument were specified.

Returns
-------
Expand Down Expand Up @@ -358,7 +362,7 @@ def stack(arrays, axis=0):

sl = (slice(None),) * axis + (_nx.newaxis,)
expanded_arrays = [arr[sl] for arr in arrays]
return _nx.concatenate(expanded_arrays, axis=axis)
return _nx.concatenate(expanded_arrays, axis=axis, out=out)


class _Recurser(object):
Expand Down
233 changes: 140 additions & 93 deletions numpy/core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,20 +315,39 @@ PyArray_Free(PyObject *op, void *ptr)
return 0;
}

/*
* Get the ndarray subclass with the highest priority
*/
NPY_NO_EXPORT PyTypeObject *
PyArray_GetSubType(int narrays, PyArrayObject **arrays) {
PyTypeObject *subtype = &PyArray_Type;
double priority = NPY_PRIORITY;
int i;

/* Get the priority subtype for the array */
for (i = 0; i < narrays; ++i) {
if (Py_TYPE(arrays[i]) != subtype) {
double pr = PyArray_GetPriority((PyObject *)(arrays[i]), 0.0);
if (pr > priority) {
priority = pr;
subtype = Py_TYPE(arrays[i]);
}
}
}

return subtype;
}


/*
* Concatenates a list of ndarrays.
*/
NPY_NO_EXPORT PyArrayObject *
PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis)
PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis,
PyArrayObject* ret)
{
PyTypeObject *subtype = &PyArray_Type;
double priority = NPY_PRIORITY;
int iarrays, idim, ndim;
npy_intp shape[NPY_MAXDIMS], s, strides[NPY_MAXDIMS];
int strideperm[NPY_MAXDIMS];
PyArray_Descr *dtype = NULL;
PyArrayObject *ret = NULL;
npy_intp shape[NPY_MAXDIMS];
PyArrayObject_fields *sliding_view = NULL;

if (narrays <= 0) {
Expand Down Expand Up @@ -383,47 +402,57 @@ PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis)
}
}

/* Get the priority subtype for the array */
for (iarrays = 0; iarrays < narrays; ++iarrays) {
if (Py_TYPE(arrays[iarrays]) != subtype) {
double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0);
if (pr > priority) {
priority = pr;
subtype = Py_TYPE(arrays[iarrays]);
}
if (ret != NULL) {
if (PyArray_NDIM(ret) != ndim) {
PyErr_SetString(PyExc_ValueError,
"Output array has wrong dimensionality");
return NULL;
}
if (!PyArray_CompareLists(shape, PyArray_SHAPE(ret), ndim)) {
PyErr_SetString(PyExc_ValueError,
"Output array is the wrong shape");
return NULL;
}
Py_INCREF(ret);
}
else {
npy_intp s, strides[NPY_MAXDIMS];
int strideperm[NPY_MAXDIMS];

/* Get the resulting dtype from combining all the arrays */
dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
if (dtype == NULL) {
return NULL;
}
/* Get the priority subtype for the array */
PyTypeObject *subtype = PyArray_GetSubType(narrays, arrays);

/*
* Figure out the permutation to apply to the strides to match
* the memory layout of the input arrays, using ambiguity
* resolution rules matching that of the NpyIter.
*/
PyArray_CreateMultiSortedStridePerm(narrays, arrays, ndim, strideperm);
s = dtype->elsize;
for (idim = ndim-1; idim >= 0; --idim) {
int iperm = strideperm[idim];
strides[iperm] = s;
s *= shape[iperm];
}

/* Allocate the array for the result. This steals the 'dtype' reference. */
ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
dtype,
ndim,
shape,
strides,
NULL,
0,
NULL);
if (ret == NULL) {
return NULL;
/* Get the resulting dtype from combining all the arrays */
PyArray_Descr *dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
if (dtype == NULL) {
return NULL;
}

/*
* Figure out the permutation to apply to the strides to match
* the memory layout of the input arrays, using ambiguity
* resolution rules matching that of the NpyIter.
*/
PyArray_CreateMultiSortedStridePerm(narrays, arrays, ndim, strideperm);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

incidentally, this is the only place in numpy this function is used.

For a second I thought there might be some problem that we ignore strides when out is provided, like in #7633, but it's fine, PyArray_AssignArray below is insensitive to order.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I assume this is an optimization

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember this function being new in 1.7 or so, somewhat thought it was used with the new iterator, but maybe there was some change there (since I also remember there was a some bug with this or its usage). The idea is simple though, find the cache friendliest iteration order....

s = dtype->elsize;
for (idim = ndim-1; idim >= 0; --idim) {
int iperm = strideperm[idim];
strides[iperm] = s;
s *= shape[iperm];
}

/* Allocate the array for the result. This steals the 'dtype' reference. */
ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
dtype,
ndim,
shape,
strides,
NULL,
0,
NULL);
if (ret == NULL) {
return NULL;
}
}

/*
Expand Down Expand Up @@ -462,15 +491,10 @@ PyArray_ConcatenateArrays(int narrays, PyArrayObject **arrays, int axis)
*/
NPY_NO_EXPORT PyArrayObject *
PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays,
NPY_ORDER order)
NPY_ORDER order, PyArrayObject *ret)
{
PyTypeObject *subtype = &PyArray_Type;
double priority = NPY_PRIORITY;
int iarrays;
npy_intp stride;
npy_intp shape = 0;
PyArray_Descr *dtype = NULL;
PyArrayObject *ret = NULL;
PyArrayObject_fields *sliding_view = NULL;

if (narrays <= 0) {
Expand All @@ -494,36 +518,45 @@ PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays,
}
}

/* Get the priority subtype for the array */
for (iarrays = 0; iarrays < narrays; ++iarrays) {
if (Py_TYPE(arrays[iarrays]) != subtype) {
double pr = PyArray_GetPriority((PyObject *)(arrays[iarrays]), 0.0);
if (pr > priority) {
priority = pr;
subtype = Py_TYPE(arrays[iarrays]);
}
if (ret != NULL) {
if (PyArray_NDIM(ret) != 1) {
PyErr_SetString(PyExc_ValueError,
"Output array must be 1D");
return NULL;
}
if (shape != PyArray_SIZE(ret)) {
PyErr_SetString(PyExc_ValueError,
"Output array is the wrong size");
return NULL;
}
Py_INCREF(ret);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this if statement

else {
npy_intp stride;

/* Get the resulting dtype from combining all the arrays */
dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
if (dtype == NULL) {
return NULL;
}
/* Get the priority subtype for the array */
PyTypeObject *subtype = PyArray_GetSubType(narrays, arrays);

stride = dtype->elsize;
/* Get the resulting dtype from combining all the arrays */
PyArray_Descr *dtype = PyArray_ResultType(narrays, arrays, 0, NULL);
if (dtype == NULL) {
return NULL;
}

/* Allocate the array for the result. This steals the 'dtype' reference. */
ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
dtype,
1,
&shape,
&stride,
NULL,
0,
NULL);
if (ret == NULL) {
return NULL;
stride = dtype->elsize;

/* Allocate the array for the result. This steals the 'dtype' reference. */
ret = (PyArrayObject *)PyArray_NewFromDescr(subtype,
dtype,
1,
&shape,
&stride,
NULL,
0,
NULL);
if (ret == NULL) {
return NULL;
}
}

/*
Expand Down Expand Up @@ -558,22 +591,11 @@ PyArray_ConcatenateFlattenedArrays(int narrays, PyArrayObject **arrays,
return ret;
}


/*NUMPY_API
* Concatenate
*
* Concatenate an arbitrary Python sequence into an array.
* op is a python object supporting the sequence interface.
* Its elements will be concatenated together to form a single
* multidimensional array. If axis is NPY_MAXDIMS or bigger, then
* each sequence object will be flattened before concatenation
*/
NPY_NO_EXPORT PyObject *
PyArray_Concatenate(PyObject *op, int axis)
PyArray_ConcatenateInto(PyObject *op, int axis, PyArrayObject *ret)
{
int iarrays, narrays;
PyArrayObject **arrays;
PyArrayObject *ret;

if (!PySequence_Check(op)) {
PyErr_SetString(PyExc_TypeError,
Expand Down Expand Up @@ -606,10 +628,10 @@ PyArray_Concatenate(PyObject *op, int axis)
}

if (axis >= NPY_MAXDIMS) {
ret = PyArray_ConcatenateFlattenedArrays(narrays, arrays, NPY_CORDER);
ret = PyArray_ConcatenateFlattenedArrays(narrays, arrays, NPY_CORDER, ret);
}
else {
ret = PyArray_ConcatenateArrays(narrays, arrays, axis);
ret = PyArray_ConcatenateArrays(narrays, arrays, axis, ret);
}

for (iarrays = 0; iarrays < narrays; ++iarrays) {
Expand All @@ -629,6 +651,21 @@ PyArray_Concatenate(PyObject *op, int axis)
return NULL;
}

/*NUMPY_API
* Concatenate
*
* Concatenate an arbitrary Python sequence into an array.
* op is a python object supporting the sequence interface.
* Its elements will be concatenated together to form a single
* multidimensional array. If axis is NPY_MAXDIMS or bigger, then
* each sequence object will be flattened before concatenation
*/
NPY_NO_EXPORT PyObject *
PyArray_Concatenate(PyObject *op, int axis)
{
return PyArray_ConcatenateInto(op, axis, NULL);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And here we just redirect the old API to not break compatibility


static int
_signbit_set(PyArrayObject *arr)
{
Expand Down Expand Up @@ -2159,14 +2196,24 @@ static PyObject *
array_concatenate(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
{
PyObject *a0;
PyObject *out = NULL;
int axis = 0;
static char *kwlist[] = {"seq", "axis", NULL};
static char *kwlist[] = {"seq", "axis", "out", NULL};

if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&:concatenate", kwlist,
&a0, PyArray_AxisConverter, &axis)) {
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O&O:concatenate", kwlist,
&a0, PyArray_AxisConverter, &axis, &out)) {
return NULL;
}
return PyArray_Concatenate(a0, axis);
if (out != NULL) {
if (out == Py_None) {
out = NULL;
}
else if (!PyArray_Check(out)) {
PyErr_SetString(PyExc_TypeError, "'out' must be an array");
return NULL;
}
}
return PyArray_ConcatenateInto(a0, axis, (PyArrayObject *)out);
}

static PyObject *
Expand Down
Loading