Skip to content

Commit 3cd5514

Browse files
committed
ENH: make some masked array methods behave more like ndarray methods
Modified the np.ma functions sum, mean, count, to behave like the corresponding ndarray methods. That is, they allow multiple axes in the axis argument, now have a keepdims argument, and treat the dtype argument more carefully.
1 parent 36f742d commit 3cd5514

File tree

2 files changed

+81
-57
lines changed

2 files changed

+81
-57
lines changed

numpy/ma/core.py

+81-53
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,14 @@
2828

2929
import numpy as np
3030
import numpy.core.umath as umath
31+
from numpy.core import multiarray as mu
3132
import numpy.core.numerictypes as ntypes
3233
from numpy import ndarray, amax, amin, iscomplexobj, bool_
3334
from numpy import array as narray
3435
from numpy.lib.function_base import angle
3536
from numpy.compat import getargspec, formatargspec, long, basestring
3637
from numpy import expand_dims as n_expand_dims
38+
from numpy.core._methods import _count_reduce_items
3739

3840
if sys.version_info[0] >= 3:
3941
import pickle
@@ -4052,21 +4054,10 @@ def count(self, axis=None):
40524054
40534055
"""
40544056
m = self._mask
4055-
s = self.shape
40564057
if m is nomask:
4057-
if axis is None:
4058-
return self.size
4059-
else:
4060-
n = s[axis]
4061-
t = list(s)
4062-
del t[axis]
4063-
return np.full(t, n, dtype=np.intp)
4064-
n1 = np.size(m, axis)
4065-
n2 = np.sum(m, axis=axis, dtype=np.intp)
4066-
if axis is None:
4067-
return (n1 - n2)
4068-
else:
4069-
return narray(n1 - n2)
4058+
return _count_reduce_items(self, axis)
4059+
return np.sum(~m, axis=axis, dtype=np.intp)
4060+
40704061
#............................................
40714062
flatten = _arraymethod('flatten')
40724063
#
@@ -4522,26 +4513,39 @@ def trace(self, offset=0, axis1=0, axis2=1, dtype=None, out=None):
45224513
return D.astype(dtype).filled(0).sum(axis=None, out=out)
45234514
trace.__doc__ = ndarray.trace.__doc__
45244515

4525-
def sum(self, axis=None, dtype=None, out=None):
4516+
def sum(self, axis=None, dtype=None, out=None, keepdims=False):
45264517
"""
4527-
Return the sum of the array elements over the given axis.
4518+
Sum of array elements over a given axis.
45284519
Masked elements are set to 0 internally.
45294520
45304521
Parameters
45314522
----------
4532-
axis : {None, -1, int}, optional
4533-
Axis along which the sum is computed. The default
4534-
(`axis` = None) is to compute over the flattened array.
4535-
dtype : {None, dtype}, optional
4536-
Determines the type of the returned array and of the accumulator
4537-
where the elements are summed. If dtype has the value None and
4538-
the type of a is an integer type of precision less than the default
4539-
platform integer, then the default platform integer precision is
4540-
used. Otherwise, the dtype is the same as that of a.
4541-
out : {None, ndarray}, optional
4542-
Alternative output array in which to place the result. It must
4543-
have the same shape and buffer length as the expected output
4544-
but the type will be cast if necessary.
4523+
axis : None or int or tuple of ints, optional
4524+
Axis or axes along which a sum is performed.
4525+
The default (`axis` = `None`) is perform a sum over all
4526+
the dimensions of the input array. `axis` may be negative, in
4527+
which case it counts from the last to the first axis.
4528+
4529+
.. versionadded:: 1.10.0
4530+
4531+
If this is a tuple of ints, a sum is performed on multiple
4532+
axes, instead of a single axis or all the axes as before.
4533+
dtype : dtype, optional
4534+
The type of the returned array and of the accumulator in which
4535+
the elements are summed. By default, the dtype of `a` is used.
4536+
An exception is when `a` has an integer type with less precision
4537+
than the default platform integer. In that case, the default
4538+
platform integer is used instead.
4539+
out : ndarray, optional
4540+
Array into which the output is placed. By default, a new array is
4541+
created. If `out` is given, it must be of the appropriate shape
4542+
(the shape of `a` with `axis` removed, i.e.,
4543+
``numpy.delete(a.shape, axis)``). Its type is preserved. See
4544+
`doc.ufuncs` (Section "Output arguments") for more details.
4545+
keepdims : bool, optional
4546+
If this is set to True, the axes which are reduced are left
4547+
in the result as dimensions with size one. With this option,
4548+
the result will broadcast correctly against the original `arr`.
45454549
45464550
Returns
45474551
-------
@@ -4572,21 +4576,25 @@ def sum(self, axis=None, dtype=None, out=None):
45724576
newmask = _check_mask_axis(_mask, axis)
45734577
# No explicit output
45744578
if out is None:
4575-
result = self.filled(0).sum(axis, dtype=dtype)
4579+
result = self.filled(0).sum(axis, dtype=dtype, keepdims=keepdims)
45764580
rndim = getattr(result, 'ndim', 0)
45774581
if rndim:
45784582
result = result.view(type(self))
4579-
result.__setmask__(newmask)
4583+
if (newmask is nomask):
4584+
result.__setmask__(nomask)
4585+
else:
4586+
result.__setmask__(newmask.reshape(result.shape))
45804587
elif newmask:
45814588
result = masked
45824589
return result
45834590
# Explicit output
4584-
result = self.filled(0).sum(axis, dtype=dtype, out=out)
4591+
result = self.filled(0).sum(axis, dtype=dtype, out=out,
4592+
keepdims=keepdims)
45854593
if isinstance(out, MaskedArray):
45864594
outmask = getattr(out, '_mask', nomask)
45874595
if (outmask is nomask):
45884596
outmask = out._mask = make_mask_none(out.shape)
4589-
outmask.flat = newmask
4597+
outmask[()] = newmask.reshape(result.shape)
45904598
return out
45914599

45924600

@@ -4769,34 +4777,45 @@ def cumprod(self, axis=None, dtype=None, out=None):
47694777
return result
47704778

47714779

4772-
def mean(self, axis=None, dtype=None, out=None):
4780+
def mean(self, axis=None, dtype=None, out=None, keepdims=False):
47734781
"""
4774-
Returns the average of the array elements.
4775-
4782+
Compute the arithmetic mean along the specified axis.
47764783
Masked entries are ignored.
4777-
The average is taken over the flattened array by default, otherwise over
4778-
the specified axis. Refer to `numpy.mean` for the full documentation.
4784+
4785+
Returns the average of the array elements. The average is taken over
4786+
the flattened array by default, otherwise over the specified axis.
4787+
`float64` intermediate and return values are used for integer inputs.
47794788
47804789
Parameters
47814790
----------
47824791
a : array_like
47834792
Array containing numbers whose mean is desired. If `a` is not an
47844793
array, a conversion is attempted.
4785-
axis : int, optional
4786-
Axis along which the means are computed. The default is to compute
4787-
the mean of the flattened array.
4788-
dtype : dtype, optional
4789-
Type to use in computing the mean. For integer inputs, the default
4790-
is float64; for floating point, inputs it is the same as the input
4791-
dtype.
4794+
axis : None or int or tuple of ints, optional
4795+
Axis or axes along which the means are computed. The default is to
4796+
compute the mean of the flattened array.
4797+
4798+
.. versionadded: 1.10.0
4799+
4800+
If this is a tuple of ints, a mean is performed over multiple axes,
4801+
instead of a single axis or all the axes as before.
4802+
dtype : data-type, optional
4803+
Type to use in computing the mean. For integer inputs, the default
4804+
is `float64`; for floating point inputs, it is the same as the
4805+
input dtype.
47924806
out : ndarray, optional
4793-
Alternative output array in which to place the result. It must have
4794-
the same shape as the expected output but the type will be cast if
4795-
necessary.
4807+
Alternate output array in which to place the result. The default
4808+
is ``None``; if provided, it must have the same shape as the
4809+
expected output, but the type will be cast if necessary.
4810+
See `doc.ufuncs` for details.
4811+
keepdims : bool, optional
4812+
If this is set to True, the axes which are reduced are left
4813+
in the result as dimensions with size one. With this option,
4814+
the result will broadcast correctly against the original `arr`.
47964815
47974816
Returns
47984817
-------
4799-
mean : ndarray, see dtype parameter above
4818+
m : ndarray, see dtype parameter above
48004819
If `out=None`, returns a new array containing the mean values,
48014820
otherwise a reference to the output array is returned.
48024821
@@ -4818,21 +4837,30 @@ def mean(self, axis=None, dtype=None, out=None):
48184837
48194838
"""
48204839
if self._mask is nomask:
4821-
result = super(MaskedArray, self).mean(axis=axis, dtype=dtype)
4840+
result = super(MaskedArray, self).mean(axis=axis, dtype=dtype,
4841+
keepdims=keepdims)
48224842
else:
4823-
dsum = self.sum(axis=axis, dtype=dtype)
48244843
cnt = self.count(axis=axis)
4844+
4845+
# Cast bool, unsigned int, and int to float64 by default
4846+
if dtype is None and issubclass(self.dtype.type,
4847+
(ntypes.integer, ntypes.bool_)):
4848+
dtype = mu.dtype('f8')
4849+
4850+
dsum = self.sum(axis=axis, dtype=dtype, keepdims=keepdims)
4851+
48254852
if cnt.shape == () and (cnt == 0):
48264853
result = masked
48274854
else:
48284855
result = dsum * 1. / cnt
4856+
48294857
if out is not None:
4830-
out.flat = result
4858+
out[()] = result
48314859
if isinstance(out, MaskedArray):
48324860
outmask = getattr(out, '_mask', nomask)
48334861
if (outmask is nomask):
48344862
outmask = out._mask = make_mask_none(out.shape)
4835-
outmask.flat = getattr(result, '_mask', nomask)
4863+
outmask[()] = getattr(result, '_mask', nomask)
48364864
return out
48374865
return result
48384866

numpy/ma/tests/test_core.py

-4
Original file line numberDiff line numberDiff line change
@@ -825,10 +825,6 @@ def test_count_func(self):
825825
assert_(getmask(res) is nomask)
826826

827827
ott= array([0., 1., 2., 3.])
828-
res = count(ott, 0)
829-
assert_(isinstance(res, ndarray))
830-
assert_(res.dtype.type is np.intp)
831-
832828
assert_raises(IndexError, ott.count, 1)
833829

834830
def test_minmax_func(self):

0 commit comments

Comments
 (0)