ENH: make some masked array methods behave more like ndarray methods

ahaldane · ahaldane · commit 3cd5514d3063 · 2015-03-22T01:27:54.000-04:00
Modified the np.ma functions sum, mean, count, to behave like the
corresponding ndarray methods. That is, they allow multiple axes in the
axis argument, now have a keepdims argument, and treat the dtype
argument more carefully.
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
@@ -28,12 +28,14 @@
 
 import numpy as np
 import numpy.core.umath as umath
+from numpy.core import multiarray as mu
 import numpy.core.numerictypes as ntypes
 from numpy import ndarray, amax, amin, iscomplexobj, bool_
 from numpy import array as narray
 from numpy.lib.function_base import angle
 from numpy.compat import getargspec, formatargspec, long, basestring
 from numpy import expand_dims as n_expand_dims
+from numpy.core._methods import _count_reduce_items
 
 if sys.version_info[0] >= 3:
     import pickle
@@ -4052,21 +4054,10 @@ def count(self, axis=None):
 
         """
         m = self._mask
-        s = self.shape
         if m is nomask:
-            if axis is None:
-                return self.size
-            else:
-                n = s[axis]
-                t = list(s)
-                del t[axis]
-                return np.full(t, n, dtype=np.intp)
-        n1 = np.size(m, axis)
-        n2 = np.sum(m, axis=axis, dtype=np.intp)
-        if axis is None:
-            return (n1 - n2)
-        else:
-            return narray(n1 - n2)
+            return _count_reduce_items(self, axis)
+        return np.sum(~m, axis=axis, dtype=np.intp)
+
     #............................................
     flatten = _arraymethod('flatten')
     #
@@ -4522,26 +4513,39 @@ def trace(self, offset=0, axis1=0, axis2=1, dtype=None, out=None):
             return D.astype(dtype).filled(0).sum(axis=None, out=out)
     trace.__doc__ = ndarray.trace.__doc__
 
-    def sum(self, axis=None, dtype=None, out=None):
+    def sum(self, axis=None, dtype=None, out=None, keepdims=False):
         """
-        Return the sum of the array elements over the given axis.
+        Sum of array elements over a given axis.
         Masked elements are set to 0 internally.
 
         Parameters
         ----------
-        axis : {None, -1, int}, optional
-            Axis along which the sum is computed. The default
-            (`axis` = None) is to compute over the flattened array.
-        dtype : {None, dtype}, optional
-            Determines the type of the returned array and of the accumulator
-            where the elements are summed. If dtype has the value None and
-            the type of a is an integer type of precision less than the default
-            platform integer, then the default platform integer precision is
-            used.  Otherwise, the dtype is the same as that of a.
-        out :  {None, ndarray}, optional
-            Alternative output array in which to place the result. It must
-            have the same shape and buffer length as the expected output
-            but the type will be cast if necessary.
+        axis : None or int or tuple of ints, optional
+            Axis or axes along which a sum is performed.
+            The default (`axis` = `None`) is perform a sum over all
+            the dimensions of the input array. `axis` may be negative, in
+            which case it counts from the last to the first axis.
+
+            .. versionadded:: 1.10.0
+
+            If this is a tuple of ints, a sum is performed on multiple
+            axes, instead of a single axis or all the axes as before.
+        dtype : dtype, optional
+            The type of the returned array and of the accumulator in which
+            the elements are summed.  By default, the dtype of `a` is used.
+            An exception is when `a` has an integer type with less precision
+            than the default platform integer.  In that case, the default
+            platform integer is used instead.
+        out : ndarray, optional
+            Array into which the output is placed.  By default, a new array is
+            created.  If `out` is given, it must be of the appropriate shape
+            (the shape of `a` with `axis` removed, i.e.,
+            ``numpy.delete(a.shape, axis)``).  Its type is preserved. See
+            `doc.ufuncs` (Section "Output arguments") for more details.
+        keepdims : bool, optional
+            If this is set to True, the axes which are reduced are left
+            in the result as dimensions with size one. With this option,
+            the result will broadcast correctly against the original `arr`.
 
         Returns
         -------
@@ -4572,21 +4576,25 @@ def sum(self, axis=None, dtype=None, out=None):
         newmask = _check_mask_axis(_mask, axis)
         # No explicit output
         if out is None:
-            result = self.filled(0).sum(axis, dtype=dtype)
+            result = self.filled(0).sum(axis, dtype=dtype, keepdims=keepdims)
             rndim = getattr(result, 'ndim', 0)
             if rndim:
                 result = result.view(type(self))
-                result.__setmask__(newmask)
+                if (newmask is nomask):
+                    result.__setmask__(nomask)
+                else:
+                    result.__setmask__(newmask.reshape(result.shape))
             elif newmask:
                 result = masked
             return result
         # Explicit output
-        result = self.filled(0).sum(axis, dtype=dtype, out=out)
+        result = self.filled(0).sum(axis, dtype=dtype, out=out,
+                                    keepdims=keepdims)
         if isinstance(out, MaskedArray):
             outmask = getattr(out, '_mask', nomask)
             if (outmask is nomask):
                 outmask = out._mask = make_mask_none(out.shape)
-            outmask.flat = newmask
+            outmask[()] = newmask.reshape(result.shape)
         return out
 
 
@@ -4769,34 +4777,45 @@ def cumprod(self, axis=None, dtype=None, out=None):
         return result
 
 
-    def mean(self, axis=None, dtype=None, out=None):
+    def mean(self, axis=None, dtype=None, out=None, keepdims=False):
         """
-        Returns the average of the array elements.
-
+        Compute the arithmetic mean along the specified axis.
         Masked entries are ignored.
-        The average is taken over the flattened array by default, otherwise over
-        the specified axis. Refer to `numpy.mean` for the full documentation.
+
+        Returns the average of the array elements.  The average is taken over
+        the flattened array by default, otherwise over the specified axis.
+        `float64` intermediate and return values are used for integer inputs.
 
         Parameters
         ----------
         a : array_like
             Array containing numbers whose mean is desired. If `a` is not an
             array, a conversion is attempted.
-        axis : int, optional
-            Axis along which the means are computed. The default is to compute
-            the mean of the flattened array.
-        dtype : dtype, optional
-            Type to use in computing the mean. For integer inputs, the default
-            is float64; for floating point, inputs it is the same as the input
-            dtype.
+        axis : None or int or tuple of ints, optional
+            Axis or axes along which the means are computed. The default is to
+            compute the mean of the flattened array.
+
+            .. versionadded: 1.10.0
+
+            If this is a tuple of ints, a mean is performed over multiple axes,
+            instead of a single axis or all the axes as before.
+        dtype : data-type, optional
+            Type to use in computing the mean.  For integer inputs, the default
+            is `float64`; for floating point inputs, it is the same as the
+            input dtype.
         out : ndarray, optional
-            Alternative output array in which to place the result. It must have
-            the same shape as the expected output but the type will be cast if
-            necessary.
+            Alternate output array in which to place the result.  The default
+            is ``None``; if provided, it must have the same shape as the
+            expected output, but the type will be cast if necessary.
+            See `doc.ufuncs` for details.
+        keepdims : bool, optional
+            If this is set to True, the axes which are reduced are left
+            in the result as dimensions with size one. With this option,
+            the result will broadcast correctly against the original `arr`.
 
         Returns
         -------
-        mean : ndarray, see dtype parameter above
+        m : ndarray, see dtype parameter above
             If `out=None`, returns a new array containing the mean values,
             otherwise a reference to the output array is returned.
 
@@ -4818,21 +4837,30 @@ def mean(self, axis=None, dtype=None, out=None):
 
         """
         if self._mask is nomask:
-            result = super(MaskedArray, self).mean(axis=axis, dtype=dtype)
+            result = super(MaskedArray, self).mean(axis=axis, dtype=dtype,
+                                                   keepdims=keepdims)
         else:
-            dsum = self.sum(axis=axis, dtype=dtype)
             cnt = self.count(axis=axis)
+
+            # Cast bool, unsigned int, and int to float64 by default
+            if dtype is None and issubclass(self.dtype.type,
+                                            (ntypes.integer, ntypes.bool_)):
+                dtype = mu.dtype('f8')
+
+            dsum = self.sum(axis=axis, dtype=dtype, keepdims=keepdims)
+
             if cnt.shape == () and (cnt == 0):
                 result = masked
             else:
                 result = dsum * 1. / cnt
+
         if out is not None:
-            out.flat = result
+            out[()] = result
             if isinstance(out, MaskedArray):
                 outmask = getattr(out, '_mask', nomask)
                 if (outmask is nomask):
                     outmask = out._mask = make_mask_none(out.shape)
-                outmask.flat = getattr(result, '_mask', nomask)
+                outmask[()] = getattr(result, '_mask', nomask)
             return out
         return result
 
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
@@ -825,10 +825,6 @@ def test_count_func(self):
         assert_(getmask(res) is nomask)
 
         ott= array([0., 1., 2., 3.])
-        res = count(ott, 0)
-        assert_(isinstance(res, ndarray))
-        assert_(res.dtype.type is np.intp)
-
         assert_raises(IndexError, ott.count, 1)
 
     def test_minmax_func(self):