Skip to content

ENH: core: Generalize ndarray.astype to take new standard keyword argumen #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 3, 2011
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions numpy/add_newdocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3001,14 +3001,39 @@ def luf(lamdaexpr, *args, **kwargs):

add_newdoc('numpy.core.multiarray', 'ndarray', ('astype',
"""
a.astype(t)
a.astype(dtype, order='K', casting='unsafe', subok=True, copy=True)

Copy of the array, cast to a specified type.

Parameters
----------
t : str or dtype
dtype : str or dtype
Typecode or data-type to which the array is cast.
order : {'C', 'F', 'A', or 'K'}, optional
Controls the memory layout order of the result.
'C' means C order, 'F' means Fortran order, 'A'
means 'F' order if all the arrays are Fortran contiguous,
'C' order otherwise, and 'K' means as close to the
order the array elements appear in memory as possible.
Default is 'K'.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'unsafe'
for backwards compatibility.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
subok : bool, optional
If True, then sub-classes will be passed-through (default), otherwise
the returned array will be forced to be a base-class array.
copy : bool, optional
By default, astype always returns a newly allocated array. If this
is set to false, and the `dtype`, `order`, and `subok`
requirements are satisfied, the input array is returned instead
of a copy.

Raises
------
Expand Down
119 changes: 111 additions & 8 deletions numpy/core/src/multiarray/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -768,18 +768,121 @@ array_setasflat(PyArrayObject *self, PyObject *args)
Py_RETURN_NONE;
}

static PyObject *
array_astype(PyArrayObject *self, PyObject *args)
static const char *
npy_casting_to_string(NPY_CASTING casting)
{
PyArray_Descr *descr = NULL;
switch (casting) {
case NPY_NO_CASTING:
return "'no'";
case NPY_EQUIV_CASTING:
return "'equiv'";
case NPY_SAFE_CASTING:
return "'safe'";
case NPY_SAME_KIND_CASTING:
return "'same_kind'";
case NPY_UNSAFE_CASTING:
return "'unsafe'";
default:
return "<unknown>";
}
}

if (!PyArg_ParseTuple(args, "O&", PyArray_DescrConverter,
&descr)) {
Py_XDECREF(descr);
static PyObject *
array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"dtype", "order", "casting",
"subok", "copy", NULL};
PyArray_Descr *dtype = NULL;
/*
* TODO: UNSAFE default for compatibility, I think
* switching to SAME_KIND by default would be good.
*/
NPY_CASTING casting = NPY_UNSAFE_CASTING;
NPY_ORDER order = NPY_KEEPORDER;
int forcecopy = 1, subok = 1;

if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|O&O&ii", kwlist,
PyArray_DescrConverter, &dtype,
PyArray_OrderConverter, &order,
PyArray_CastingConverter, &casting,
&subok,
&forcecopy)) {
Py_XDECREF(dtype);
return NULL;
}

return PyArray_CastToType(self, descr, PyArray_ISFORTRAN(self));
/*
* If the memory layout matches and, data types are equivalent,
* and it's not a subtype if subok is False, then we
* can skip the copy.
*/
if (!forcecopy && (order == NPY_KEEPORDER ||
(order == NPY_ANYORDER &&
(PyArray_IS_C_CONTIGUOUS(self) ||
PyArray_IS_F_CONTIGUOUS(self))) ||
(order == NPY_CORDER &&
PyArray_IS_C_CONTIGUOUS(self)) ||
(order == NPY_FORTRANORDER &&
PyArray_IS_F_CONTIGUOUS(self))) &&
(subok || PyArray_CheckExact(self)) &&
PyArray_EquivTypes(dtype, PyArray_DESCR(self))) {
Py_DECREF(dtype);
Py_INCREF(self);
return (PyObject *)self;
}
else if (PyArray_CanCastArrayTo(self, dtype, casting)) {
PyArrayObject *ret;

if (dtype->elsize == 0) {
PyArray_DESCR_REPLACE(dtype);
if (dtype == NULL) {
return NULL;
}

if (dtype->type_num == PyArray_DESCR(self)->type_num ||
dtype->type_num == NPY_VOID) {
dtype->elsize = PyArray_DESCR(self)->elsize;
}
else if (PyArray_DESCR(self)->type_num == NPY_STRING &&
dtype->type_num == NPY_UNICODE) {
dtype->elsize = PyArray_DESCR(self)->elsize * 4;
}
else if (PyArray_DESCR(self)->type_num == NPY_UNICODE &&
dtype->type_num == NPY_STRING) {
dtype->elsize = PyArray_DESCR(self)->elsize / 4;
}
}

/* This steals the reference to dtype, so no DECREF of dtype */
ret = (PyArrayObject *)PyArray_NewLikeArray(
self, order, dtype, subok);

if (ret == NULL) {
return NULL;
}
if (PyArray_CopyInto(ret, self) < 0) {
Py_DECREF(ret);
return NULL;
}

return (PyObject *)ret;
}
else {
PyObject *errmsg;
errmsg = PyUString_FromString("Cannot cast array from ");
PyUString_ConcatAndDel(&errmsg,
PyObject_Repr((PyObject *)PyArray_DESCR(self)));
PyUString_ConcatAndDel(&errmsg,
PyUString_FromString(" to "));
PyUString_ConcatAndDel(&errmsg,
PyObject_Repr((PyObject *)dtype));
PyUString_ConcatAndDel(&errmsg,
PyUString_FromFormat(" according to the rule %s",
npy_casting_to_string(casting)));
PyErr_SetObject(PyExc_TypeError, errmsg);
Py_DECREF(dtype);
return NULL;
}
}

/* default sub-type implementation */
Expand Down Expand Up @@ -2196,7 +2299,7 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"astype",
(PyCFunction)array_astype,
METH_VARARGS, NULL},
METH_VARARGS | METH_KEYWORDS, NULL},
{"byteswap",
(PyCFunction)array_byteswap,
METH_VARARGS, NULL},
Expand Down
60 changes: 60 additions & 0 deletions numpy/core/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,65 @@ def test_fastCopyAndTranspose():
assert_equal(b, a.T)
assert_(b.flags.owndata)

def test_array_astype():
a = np.arange(6, dtype='f4').reshape(2,3)
# Default behavior: allows unsafe casts, keeps memory layout,
# always copies.
b = a.astype('i4')
assert_equal(a, b)
assert_equal(b.dtype, np.dtype('i4'))
assert_equal(a.strides, b.strides)
b = a.T.astype('i4')
assert_equal(a.T, b)
assert_equal(b.dtype, np.dtype('i4'))
assert_equal(a.T.strides, b.strides)
b = a.astype('f4')
assert_equal(a, b)
assert_(not (a is b))

# copy=False parameter can sometimes skip a copy
b = a.astype('f4', copy=False)
assert_(a is b)

# order parameter allows overriding of the memory layout,
# forcing a copy if the layout is wrong
b = a.astype('f4', order='F', copy=False)
assert_equal(a, b)
assert_(not (a is b))
assert_(b.flags.f_contiguous)

b = a.astype('f4', order='C', copy=False)
assert_equal(a, b)
assert_(a is b)
assert_(b.flags.c_contiguous)

# casting parameter allows catching bad casts
b = a.astype('c8', casting='safe')
assert_equal(a, b)
assert_equal(b.dtype, np.dtype('c8'))

assert_raises(TypeError, a.astype, 'i4', casting='safe')

# subok=False passes through a non-subclassed array
b = a.astype('f4', subok=0, copy=False)
assert_(a is b)

a = np.matrix([[0,1,2],[3,4,5]], dtype='f4')

# subok=True passes through a matrix
b = a.astype('f4', subok=True, copy=False)
assert_(a is b)

# subok=True is default, and creates a subtype on a cast
b = a.astype('i4', copy=False)
assert_equal(a, b)
assert_equal(type(b), np.matrix)

# subok=False never returns a matrix
b = a.astype('f4', subok=False, copy=False)
assert_equal(a, b)
assert_(not (a is b))
assert_(type(b) != np.matrix)

if __name__ == "__main__":
run_module_suite()