From 37e8d19a65834294bb4985f773a6f96803e07ed1 Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sun, 14 Feb 2016 02:49:19 -0500 Subject: [PATCH 1/3] ENH: add extra kwargs and update doc of many MA methods Updated any, all, sum, prod, cumsum, cumprod, min, max, argmin, argmax, mean, var --- numpy/ma/core.py | 465 ++++++++++++++++++++--------------------------- 1 file changed, 201 insertions(+), 264 deletions(-) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index e4f1a9584b38..97dcae1cf045 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -1750,10 +1750,11 @@ def _flatsequence(sequence): return np.array([_ for _ in flattened], dtype=bool) -def _check_mask_axis(mask, axis): +def _check_mask_axis(mask, axis, keepdims=np._NoValue): "Check whether there are masked values along the given axis" + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} if mask is not nomask: - return mask.all(axis=axis) + return mask.all(axis=axis, **kwargs) return nomask @@ -4226,22 +4227,33 @@ def get_real(self): return result real = property(fget=get_real, doc="Real part") - def count(self, axis=None): + def count(self, axis=None, keepdims=np._NoValue): """ Count the non-masked elements of the array along the given axis. Parameters ---------- - axis : int, optional - Axis along which to count the non-masked elements. If `axis` is - `None`, all non-masked elements are counted. + axis : None or int or tuple of ints, optional + Axis or axes along which the count is performed. + The default (`axis` = `None`) performs the count over all + the dimensions of the input array. `axis` may be negative, in + which case it counts from the last to the first axis. + + .. versionadded:: 1.10.0 + + If this is a tuple of ints, the count is performed on multiple + axes, instead of a single axis or all the axes as before. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. Returns ------- - result : int or ndarray - If `axis` is `None`, an integer count is returned. When `axis` is - not `None`, an array with shape determined by the lengths of the - remaining axes, is returned. + result : ndarray or scalar + An array with the same shape as the input array, with the specified + axis removed. If the array is a 0-d array, or if `axis` is None, a + scalar is returned. See Also -------- @@ -4272,22 +4284,45 @@ def count(self, axis=None): array([3, 0]) """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + m = self._mask - s = self.shape + # special case for matrices (we assume no other subclasses modify + # their dimensions) + if isinstance(self.data, np.matrix): + if m is nomask: + m = np.zeros(self.shape, dtype=np.bool_) + m = m.view(type(self.data)) + if m is nomask: - if axis is None: + # compare to _count_reduce_items in _methods.py + + if self.shape is (): + if axis not in (None, 0): + raise ValueError("'axis' entry is out of bounds") + return 1 + elif axis is None: return self.size + + axes = axis if isinstance(axis, tuple) else (axis,) + items = 1 + for ax in axes: + items *= self.shape[ax] + + if kwargs.get('keepdims', False): + out_dims = list(self.shape) + for a in axes: + out_dims[a] = 1 else: - n = s[axis] - t = list(s) - del t[axis] - return np.full(t, n, dtype=np.intp) - n1 = np.size(m, axis) - n2 = np.sum(m, axis=axis, dtype=np.intp) - if axis is None: - return (n1 - n2) - else: - return narray(n1 - n2) + out_dims = [d for n,d in enumerate(self.shape) if n not in axes] + # make sure to return a 0-d array if axis is supplied + return np.full(out_dims, items, dtype=np.intp) + + # take care of the masked singleton + if self is masked: + return 0 + + return (~m).sum(axis=axis, dtype=np.intp, **kwargs) flatten = _arraymethod('flatten') @@ -4536,28 +4571,20 @@ def iscontiguous(self): """ return self.flags['CONTIGUOUS'] - def all(self, axis=None, out=None): + def all(self, axis=None, out=None, keepdims=np._NoValue): """ - Check if all of the elements of `a` are true. + Returns True if all elements evaluate to True. - Performs a :func:`logical_and` over the given axis and returns the result. - Masked values are considered as True during computation. - For convenience, the output array is masked where ALL the values along the - current axis are masked: if the output would have been a scalar and that - all the values are masked, then the output is `masked`. + The output array is masked where all the values along the given axis + are masked: if the output would have been a scalar and that all the + values are masked, then the output is `masked`. - Parameters - ---------- - axis : {None, integer} - Axis to perform the operation over. - If None, perform over flattened array. - out : {None, array}, optional - Array into which the result can be placed. Its type is preserved - and it must be of the right shape to hold the output. + Refer to `numpy.all` for full documentation. See Also -------- - all : equivalent function + ndarray.all : corresponding function for ndarrays + numpy.all : equivalent function Examples -------- @@ -4568,50 +4595,47 @@ def all(self, axis=None, out=None): True """ - mask = _check_mask_axis(self._mask, axis) + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + mask = _check_mask_axis(self._mask, axis, **kwargs) if out is None: - d = self.filled(True).all(axis=axis).view(type(self)) + d = self.filled(True).all(axis=axis, **kwargs).view(type(self)) if d.ndim: d.__setmask__(mask) elif mask: return masked return d - self.filled(True).all(axis=axis, out=out) + self.filled(True).all(axis=axis, out=out, **kwargs) if isinstance(out, MaskedArray): if out.ndim or mask: out.__setmask__(mask) return out - def any(self, axis=None, out=None): + def any(self, axis=None, out=None, keepdims=np._NoValue): """ - Check if any of the elements of `a` are true. + Returns True if any of the elements of `a` evaluate to True. - Performs a logical_or over the given axis and returns the result. Masked values are considered as False during computation. - Parameters - ---------- - axis : {None, integer} - Axis to perform the operation over. - If None, perform over flattened array and return a scalar. - out : {None, array}, optional - Array into which the result can be placed. Its type is preserved - and it must be of the right shape to hold the output. + Refer to `numpy.any` for full documentation. See Also -------- - any : equivalent function + ndarray.any : corresponding function for ndarrays + numpy.any : equivalent function """ - mask = _check_mask_axis(self._mask, axis) + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + mask = _check_mask_axis(self._mask, axis, **kwargs) if out is None: - d = self.filled(False).any(axis=axis).view(type(self)) + d = self.filled(False).any(axis=axis, **kwargs).view(type(self)) if d.ndim: d.__setmask__(mask) elif mask: d = masked return d - self.filled(False).any(axis=axis, out=out) + self.filled(False).any(axis=axis, out=out, **kwargs) if isinstance(out, MaskedArray): if out.ndim or mask: out.__setmask__(mask) @@ -4772,34 +4796,18 @@ def dot(self, b, out=None, strict=False): """ return dot(self, b, out=out, strict=strict) - def sum(self, axis=None, dtype=None, out=None): + def sum(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Return the sum of the array elements over the given axis. + Masked elements are set to 0 internally. - Parameters - ---------- - axis : {None, -1, int}, optional - Axis along which the sum is computed. The default - (`axis` = None) is to compute over the flattened array. - dtype : {None, dtype}, optional - Determines the type of the returned array and of the accumulator - where the elements are summed. If dtype has the value None and - the type of a is an integer type of precision less than the default - platform integer, then the default platform integer precision is - used. Otherwise, the dtype is the same as that of a. - out : {None, ndarray}, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output - but the type will be cast if necessary. + Refer to `numpy.sum` for full documentation. - Returns - ------- - sum_along_axis : MaskedArray or scalar - An array with the same shape as self, with the specified - axis removed. If self is a 0-d array, or if `axis` is None, a scalar - is returned. If an output array is specified, a reference to - `out` is returned. + See Also + -------- + ndarray.sum : corresponding function for ndarrays + numpy.sum : equivalent function Examples -------- @@ -4818,11 +4826,13 @@ def sum(self, axis=None, dtype=None, out=None): """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + _mask = self._mask - newmask = _check_mask_axis(_mask, axis) + newmask = _check_mask_axis(_mask, axis, **kwargs) # No explicit output if out is None: - result = self.filled(0).sum(axis, dtype=dtype) + result = self.filled(0).sum(axis, dtype=dtype, **kwargs) rndim = getattr(result, 'ndim', 0) if rndim: result = result.view(type(self)) @@ -4831,7 +4841,7 @@ def sum(self, axis=None, dtype=None, out=None): result = masked return result # Explicit output - result = self.filled(0).sum(axis, dtype=dtype, out=out) + result = self.filled(0).sum(axis, dtype=dtype, out=out, **kwargs) if isinstance(out, MaskedArray): outmask = getattr(out, '_mask', nomask) if (outmask is nomask): @@ -4841,36 +4851,13 @@ def sum(self, axis=None, dtype=None, out=None): def cumsum(self, axis=None, dtype=None, out=None): """ - Return the cumulative sum of the elements along the given axis. - The cumulative sum is calculated over the flattened array by - default, otherwise over the specified axis. + Return the cumulative sum of the array elements over the given axis. Masked values are set to 0 internally during the computation. However, their position is saved, and the result will be masked at the same locations. - Parameters - ---------- - axis : {None, -1, int}, optional - Axis along which the sum is computed. The default (`axis` = None) is to - compute over the flattened array. `axis` may be negative, in which case - it counts from the last to the first axis. - dtype : {None, dtype}, optional - Type of the returned array and of the accumulator in which the - elements are summed. If `dtype` is not specified, it defaults - to the dtype of `a`, unless `a` has an integer dtype with a - precision less than that of the default platform integer. In - that case, the default platform integer is used. - out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output - but the type will be cast if necessary. - - Returns - ------- - cumsum : ndarray. - A new array holding the result is returned unless ``out`` is - specified, in which case a reference to ``out`` is returned. + Refer to `numpy.cumsum` for full documentation. Notes ----- @@ -4879,6 +4866,11 @@ def cumsum(self, axis=None, dtype=None, out=None): Arithmetic is modular when using integer types, and no error is raised on overflow. + See Also + -------- + ndarray.cumsum : corresponding function for ndarrays + numpy.cumsum : equivalent function + Examples -------- >>> marr = np.ma.array(np.arange(10), mask=[0,0,0,1,1,1,0,0,0,0]) @@ -4895,60 +4887,31 @@ def cumsum(self, axis=None, dtype=None, out=None): result.__setmask__(self._mask) return result - def prod(self, axis=None, dtype=None, out=None): + def prod(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ Return the product of the array elements over the given axis. - Masked elements are set to 1 internally for computation. - - Parameters - ---------- - axis : {None, int}, optional - Axis over which the product is taken. If None is used, then the - product is over all the array elements. - dtype : {None, dtype}, optional - Determines the type of the returned array and of the accumulator - where the elements are multiplied. If ``dtype`` has the value ``None`` - and the type of a is an integer type of precision less than the default - platform integer, then the default platform integer precision is - used. Otherwise, the dtype is the same as that of a. - out : {None, array}, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output but the type will be cast if - necessary. - Returns - ------- - product_along_axis : {array, scalar}, see dtype parameter above. - Returns an array whose shape is the same as a with the specified - axis removed. Returns a 0d array when a is 1d or axis=None. - Returns a reference to the specified output array if specified. + Masked elements are set to 1 internally for computation. - See Also - -------- - prod : equivalent function + Refer to `numpy.prod` for full documentation. Notes ----- Arithmetic is modular when using integer types, and no error is raised on overflow. - Examples + See Also -------- - >>> np.prod([1.,2.]) - 2.0 - >>> np.prod([1.,2.], dtype=np.int32) - 2 - >>> np.prod([[1.,2.],[3.,4.]]) - 24.0 - >>> np.prod([[1.,2.],[3.,4.]], axis=1) - array([ 2., 12.]) - + ndarray.prod : corresponding function for ndarrays + numpy.prod : equivalent function """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + _mask = self._mask - newmask = _check_mask_axis(_mask, axis) + newmask = _check_mask_axis(_mask, axis, **kwargs) # No explicit output if out is None: - result = self.filled(1).prod(axis, dtype=dtype) + result = self.filled(1).prod(axis, dtype=dtype, **kwargs) rndim = getattr(result, 'ndim', 0) if rndim: result = result.view(type(self)) @@ -4957,47 +4920,24 @@ def prod(self, axis=None, dtype=None, out=None): result = masked return result # Explicit output - result = self.filled(1).prod(axis, dtype=dtype, out=out) + result = self.filled(1).prod(axis, dtype=dtype, out=out, **kwargs) if isinstance(out, MaskedArray): outmask = getattr(out, '_mask', nomask) if (outmask is nomask): outmask = out._mask = make_mask_none(out.shape) outmask.flat = newmask return out - product = prod def cumprod(self, axis=None, dtype=None, out=None): """ - Return the cumulative product of the elements along the given axis. - The cumulative product is taken over the flattened array by - default, otherwise over the specified axis. + Return the cumulative product of the array elements over the given axis. Masked values are set to 1 internally during the computation. However, their position is saved, and the result will be masked at the same locations. - Parameters - ---------- - axis : {None, -1, int}, optional - Axis along which the product is computed. The default - (`axis` = None) is to compute over the flattened array. - dtype : {None, dtype}, optional - Determines the type of the returned array and of the accumulator - where the elements are multiplied. If ``dtype`` has the value ``None`` - and the type of ``a`` is an integer type of precision less than the - default platform integer, then the default platform integer precision - is used. Otherwise, the dtype is the same as that of ``a``. - out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape and buffer length as the expected output - but the type will be cast if necessary. - - Returns - ------- - cumprod : ndarray - A new array holding the result is returned unless out is specified, - in which case a reference to out is returned. + Refer to `numpy.cumprod` for full documentation. Notes ----- @@ -5006,6 +4946,10 @@ def cumprod(self, axis=None, dtype=None, out=None): Arithmetic is modular when using integer types, and no error is raised on overflow. + See Also + -------- + ndarray.cumprod : corresponding function for ndarrays + numpy.cumprod : equivalent function """ result = self.filled(1).cumprod(axis=axis, dtype=dtype, out=out) if out is not None: @@ -5016,41 +4960,19 @@ def cumprod(self, axis=None, dtype=None, out=None): result.__setmask__(self._mask) return result - def mean(self, axis=None, dtype=None, out=None): + def mean(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ - Returns the average of the array elements. + Returns the average of the array elements along given axis. - Masked entries are ignored. - The average is taken over the flattened array by default, otherwise over - the specified axis. Refer to `numpy.mean` for the full documentation. + Masked entries are ignored, and result elements which are not + finite will be masked. - Parameters - ---------- - a : array_like - Array containing numbers whose mean is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the means are computed. The default is to compute - the mean of the flattened array. - dtype : dtype, optional - Type to use in computing the mean. For integer inputs, the default - is float64; for floating point, inputs it is the same as the input - dtype. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output but the type will be cast if - necessary. - - Returns - ------- - mean : ndarray, see dtype parameter above - If `out=None`, returns a new array containing the mean values, - otherwise a reference to the output array is returned. + Refer to `numpy.mean` for full documentation. See Also -------- - numpy.ma.mean : Equivalent function. - numpy.mean : Equivalent function on non-masked arrays. + ndarray.mean : corresponding function for ndarrays + numpy.mean : Equivalent function numpy.ma.average: Weighted average. Examples @@ -5064,11 +4986,14 @@ def mean(self, axis=None, dtype=None, out=None): 1.5 """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + if self._mask is nomask: - result = super(MaskedArray, self).mean(axis=axis, dtype=dtype) + result = super(MaskedArray, self).mean(axis=axis, + dtype=dtype, **kwargs) else: - dsum = self.sum(axis=axis, dtype=dtype) - cnt = self.count(axis=axis) + dsum = self.sum(axis=axis, dtype=dtype, **kwargs) + cnt = self.count(axis=axis, **kwargs) if cnt.shape == () and (cnt == 0): result = masked else: @@ -5123,22 +5048,38 @@ def anom(self, axis=None, dtype=None): else: return (self - expand_dims(m, axis)) - def var(self, axis=None, dtype=None, out=None, ddof=0): - "" + def var(self, axis=None, dtype=None, out=None, ddof=0, + keepdims=np._NoValue): + """ + Returns the variance of the array elements along given axis. + + Masked entries are ignored, and result elements which are not + finite will be masked. + + Refer to `numpy.var` for full documentation. + + See Also + -------- + ndarray.var : corresponding function for ndarrays + numpy.var : Equivalent function + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + # Easy case: nomask, business as usual if self._mask is nomask: - return self._data.var(axis=axis, dtype=dtype, out=out, ddof=ddof) + return self._data.var(axis=axis, dtype=dtype, out=out, + ddof=ddof, **kwargs) # Some data are masked, yay! - cnt = self.count(axis=axis) - ddof - danom = self.anom(axis=axis, dtype=dtype) + cnt = self.count(axis=axis, **kwargs) - ddof + danom = self - self.mean(axis, dtype, keepdims=True) if iscomplexobj(self): danom = umath.absolute(danom) ** 2 else: danom *= danom - dvar = divide(danom.sum(axis), cnt).view(type(self)) + dvar = divide(danom.sum(axis, **kwargs), cnt).view(type(self)) # Apply the mask if it's not a scalar if dvar.ndim: - dvar._mask = mask_or(self._mask.all(axis), (cnt <= 0)) + dvar._mask = mask_or(self._mask.all(axis, **kwargs), (cnt <= 0)) dvar._update_from(self) elif getattr(dvar, '_mask', False): # Make sure that masked is returned when the scalar is masked. @@ -5165,27 +5106,40 @@ def var(self, axis=None, dtype=None, out=None, ddof=0): return dvar var.__doc__ = np.var.__doc__ - def std(self, axis=None, dtype=None, out=None, ddof=0): - "" - dvar = self.var(axis=axis, dtype=dtype, out=out, ddof=ddof) + def std(self, axis=None, dtype=None, out=None, ddof=0, + keepdims=np._NoValue): + """ + Returns the standard deviation of the array elements along given axis. + + Masked entries are ignored. + + Refer to `numpy.std` for full documentation. + + See Also + -------- + ndarray.std : corresponding function for ndarrays + numpy.std : Equivalent function + """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + + dvar = self.var(axis, dtype, out, ddof, **kwargs) if dvar is not masked: if out is not None: np.power(out, 0.5, out=out, casting='unsafe') return out dvar = sqrt(dvar) return dvar - std.__doc__ = np.std.__doc__ def round(self, decimals=0, out=None): """ - Return an array rounded a to the given number of decimals. + Return each element rounded to the given number of decimals. Refer to `numpy.around` for full documentation. See Also -------- + ndarray.around : corresponding function for ndarrays numpy.around : equivalent function - """ result = self._data.round(decimals=decimals, out=out).view(type(self)) if result.ndim > 0: @@ -5200,7 +5154,6 @@ def round(self, decimals=0, out=None): if isinstance(out, MaskedArray): out.__setmask__(self._mask) return out - round.__doc__ = ndarray.round.__doc__ def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): """ @@ -5431,7 +5384,7 @@ def sort(self, axis=-1, kind='quicksort', order=None, self._mask.flat = tmp_mask return - def min(self, axis=None, out=None, fill_value=None): + def min(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue): """ Return the minimum along a given axis. @@ -5459,14 +5412,16 @@ def min(self, axis=None, out=None, fill_value=None): Returns the minimum filling value for a given datatype. """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + _mask = self._mask - newmask = _check_mask_axis(_mask, axis) + newmask = _check_mask_axis(_mask, axis, **kwargs) if fill_value is None: fill_value = minimum_fill_value(self) # No explicit output if out is None: result = self.filled(fill_value).min( - axis=axis, out=out).view(type(self)) + axis=axis, out=out, **kwargs).view(type(self)) if result.ndim: # Set the mask result.__setmask__(newmask) @@ -5477,7 +5432,7 @@ def min(self, axis=None, out=None, fill_value=None): result = masked return result # Explicit output - result = self.filled(fill_value).min(axis=axis, out=out) + result = self.filled(fill_value).min(axis=axis, out=out, **kwargs) if isinstance(out, MaskedArray): outmask = getattr(out, '_mask', nomask) if (outmask is nomask): @@ -5491,6 +5446,7 @@ def min(self, axis=None, out=None, fill_value=None): np.copyto(out, np.nan, where=newmask) return out + # unique to masked arrays def mini(self, axis=None): """ Return the array minimum along the specified axis. @@ -5530,7 +5486,7 @@ def mini(self, axis=None): else: return minimum.reduce(self, axis) - def max(self, axis=None, out=None, fill_value=None): + def max(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue): """ Return the maximum along a given axis. @@ -5558,14 +5514,16 @@ def max(self, axis=None, out=None, fill_value=None): Returns the maximum filling value for a given datatype. """ + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} + _mask = self._mask - newmask = _check_mask_axis(_mask, axis) + newmask = _check_mask_axis(_mask, axis, **kwargs) if fill_value is None: fill_value = maximum_fill_value(self) # No explicit output if out is None: result = self.filled(fill_value).max( - axis=axis, out=out).view(type(self)) + axis=axis, out=out, **kwargs).view(type(self)) if result.ndim: # Set the mask result.__setmask__(newmask) @@ -5576,7 +5534,7 @@ def max(self, axis=None, out=None, fill_value=None): result = masked return result # Explicit output - result = self.filled(fill_value).max(axis=axis, out=out) + result = self.filled(fill_value).max(axis=axis, out=out, **kwargs) if isinstance(out, MaskedArray): outmask = getattr(out, '_mask', nomask) if (outmask is nomask): @@ -6289,24 +6247,28 @@ def __init__(self): self.compare = greater self.fill_value_func = maximum_fill_value +def min(obj, axis=None, out=None, fill_value=None, keepdims=np._NoValue): + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} -def min(obj, axis=None, out=None, fill_value=None): try: - return obj.min(axis=axis, fill_value=fill_value, out=out) + return obj.min(axis=axis, fill_value=fill_value, out=out, **kwargs) except (AttributeError, TypeError): - # If obj doesn't have a min method or if the method doesn't accept - # a fill_value argument - return asanyarray(obj).min(axis=axis, fill_value=fill_value, out=out) + # If obj doesn't have a min method, or if the method doesn't accept a + # fill_value argument + return asanyarray(obj).min(axis=axis, fill_value=fill_value, + out=out, **kwargs) min.__doc__ = MaskedArray.min.__doc__ +def max(obj, axis=None, out=None, fill_value=None, keepdims=np._NoValue): + kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} -def max(obj, axis=None, out=None, fill_value=None): try: - return obj.max(axis=axis, fill_value=fill_value, out=out) + return obj.max(axis=axis, fill_value=fill_value, out=out, **kwargs) except (AttributeError, TypeError): - # If obj doesn't have a max method, or if the method doesn't accept - # a fill_value argument - return asanyarray(obj).max(axis=axis, fill_value=fill_value, out=out) + # If obj doesn't have a max method, or if the method doesn't accept a + # fill_value argument + return asanyarray(obj).max(axis=axis, fill_value=fill_value, + out=out, **kwargs) max.__doc__ = MaskedArray.max.__doc__ @@ -6402,6 +6364,7 @@ def __call__(self, a, *args, **params): trace = _frommethod('trace') var = _frommethod('var') +count = _frommethod('count') def take(a, indices, axis=None, out=None, mode='raise'): """ @@ -6472,27 +6435,8 @@ def argsort(a, axis=None, kind='quicksort', order=None, fill_value=None): return d.argsort(axis, kind=kind, order=order) argsort.__doc__ = MaskedArray.argsort.__doc__ - -def argmin(a, axis=None, fill_value=None): - "Function version of the eponymous method." - if fill_value is None: - fill_value = default_fill_value(a) - d = filled(a, fill_value) - return d.argmin(axis=axis) -argmin.__doc__ = MaskedArray.argmin.__doc__ - - -def argmax(a, axis=None, fill_value=None): - "Function version of the eponymous method." - if fill_value is None: - fill_value = default_fill_value(a) - try: - fill_value = -fill_value - except: - pass - d = filled(a, fill_value) - return d.argmax(axis=axis) -argmax.__doc__ = MaskedArray.argmax.__doc__ +argmin = _frommethod('argmin') +argmax = _frommethod('argmax') def sort(a, axis=-1, kind='quicksort', order=None, endwith=True, fill_value=None): @@ -6605,13 +6549,6 @@ def concatenate(arrays, axis=0): return data -def count(a, axis=None): - if isinstance(a, MaskedArray): - return a.count(axis) - return masked_array(a, copy=False).count(axis) -count.__doc__ = MaskedArray.count.__doc__ - - def diag(v, k=0): """ Extract a diagonal or construct a diagonal array. From f3732210c0ebb6b0cb534926410be44ae65759ed Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sun, 14 Feb 2016 03:03:12 -0500 Subject: [PATCH 2/3] TST: Unit tests for new kwd args in MA methods --- numpy/ma/tests/test_core.py | 92 ++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 7f9b36d543f7..723257d70953 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -1004,8 +1004,7 @@ def test_count_func(self): res = count(ott, 0) assert_(isinstance(res, ndarray)) assert_(res.dtype.type is np.intp) - - assert_raises(IndexError, ott.count, 1) + assert_raises(IndexError, ott.count, axis=1) def test_minmax_func(self): # Tests minimum and maximum. @@ -1279,6 +1278,16 @@ def test_methods_with_output(self): assert_(result is output) assert_(output[0] is masked) + def test_count_mean_with_matrix(self): + m = np.ma.array(np.matrix([[1,2],[3,4]]), mask=np.zeros((2,2))) + + assert_equal(m.count(axis=0).shape, (1,2)) + assert_equal(m.count(axis=1).shape, (2,1)) + + #make sure broadcasting inside mean and var work + assert_equal(m.mean(axis=0), [[2., 3.]]) + assert_equal(m.mean(axis=1), [[1.5], [3.5]]) + def test_eq_on_structured(self): # Test the equality of structured arrays ndtype = [('A', int), ('B', int)] @@ -4224,6 +4233,85 @@ def test_view_to_dtype_and_type(self): self.assertTrue(isinstance(test, np.matrix)) self.assertTrue(not isinstance(test, MaskedArray)) +class TestOptionalArgs(TestCase): + def test_ndarrayfuncs(self): + # test axis arg behaves the same as ndarray (including mutliple axes) + + d = np.arange(24.0).reshape((2,3,4)) + m = np.zeros(24, dtype=bool).reshape((2,3,4)) + # mask out last element of last dimension + m[:,:,-1] = True + a = np.ma.array(d, mask=m) + + def testaxis(f, a, d): + numpy_f = numpy.__getattribute__(f) + ma_f = np.ma.__getattribute__(f) + + # test axis arg + assert_equal(ma_f(a, axis=1)[...,:-1], numpy_f(d[...,:-1], axis=1)) + assert_equal(ma_f(a, axis=(0,1))[...,:-1], + numpy_f(d[...,:-1], axis=(0,1))) + + def testkeepdims(f, a, d): + numpy_f = numpy.__getattribute__(f) + ma_f = np.ma.__getattribute__(f) + + # test keepdims arg + assert_equal(ma_f(a, keepdims=True).shape, + numpy_f(d, keepdims=True).shape) + assert_equal(ma_f(a, keepdims=False).shape, + numpy_f(d, keepdims=False).shape) + + # test both at once + assert_equal(ma_f(a, axis=1, keepdims=True)[...,:-1], + numpy_f(d[...,:-1], axis=1, keepdims=True)) + assert_equal(ma_f(a, axis=(0,1), keepdims=True)[...,:-1], + numpy_f(d[...,:-1], axis=(0,1), keepdims=True)) + + for f in ['sum', 'prod', 'mean', 'var', 'std']: + testaxis(f, a, d) + testkeepdims(f, a, d) + + for f in ['min', 'max']: + testaxis(f, a, d) + + d = (np.arange(24).reshape((2,3,4))%2 == 0) + a = np.ma.array(d, mask=m) + for f in ['all', 'any']: + testaxis(f, a, d) + testkeepdims(f, a, d) + + def test_count(self): + # test np.ma.count specially + + d = np.arange(24.0).reshape((2,3,4)) + m = np.zeros(24, dtype=bool).reshape((2,3,4)) + m[:,0,:] = True + a = np.ma.array(d, mask=m) + + assert_equal(count(a), 16) + assert_equal(count(a, axis=1), 2*ones((2,4))) + assert_equal(count(a, axis=(0,1)), 4*ones((4,))) + assert_equal(count(a, keepdims=True), 16*ones((1,1,1))) + assert_equal(count(a, axis=1, keepdims=True), 2*ones((2,1,4))) + assert_equal(count(a, axis=(0,1), keepdims=True), 4*ones((1,1,4))) + + # check the 'nomask' path + a = np.ma.array(d, mask=nomask) + + assert_equal(count(a), 24) + assert_equal(count(a, axis=1), 3*ones((2,4))) + assert_equal(count(a, axis=(0,1)), 6*ones((4,))) + assert_equal(count(a, keepdims=True), 24*ones((1,1,1))) + assert_equal(count(a, axis=1, keepdims=True), 3*ones((2,1,4))) + assert_equal(count(a, axis=(0,1), keepdims=True), 6*ones((1,1,4))) + + # check the 'masked' singleton + assert_equal(count(np.ma.masked), 0) + + # check 0-d arrays do not allow axis > 0 + assert_raises(ValueError, count, np.ma.array(1), axis=1) + def test_masked_array(): a = np.ma.array([0, 1, 2, 3], mask=[0, 0, 1, 0]) From f7c24297448d4d2d07ad31bbec9fefab534c6bae Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Sat, 5 Mar 2016 15:11:23 -0500 Subject: [PATCH 3/3] ENH: update MA average, median --- numpy/ma/extras.py | 149 +++++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 88 deletions(-) diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index dead02e138fc..781b2544996e 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -36,6 +36,7 @@ import numpy as np from numpy import ndarray, array as nxarray import numpy.core.umath as umath +from numpy.lib.function_base import _ureduce from numpy.lib.index_tricks import AxisConcatenator @@ -471,8 +472,8 @@ def average(a, axis=None, weights=None, returned=False): Data to be averaged. Masked entries are not taken into account in the computation. axis : int, optional - Axis along which the average is computed. The default is to compute - the average of the flattened array. + Axis along which to average `a`. If `None`, averaging is done over + the flattened array. weights : array_like, optional The importance that each element has in the computation of the average. The weights array can either be 1-D (in which case its length must be @@ -513,97 +514,53 @@ def average(a, axis=None, weights=None, returned=False): """ a = asarray(a) - mask = a.mask - ash = a.shape - if ash == (): - ash = (1,) - if axis is None: - if mask is nomask: - if weights is None: - n = a.sum(axis=None) - d = float(a.size) - else: - w = filled(weights, 0.0).ravel() - n = umath.add.reduce(a._data.ravel() * w) - d = umath.add.reduce(w) - del w - else: - if weights is None: - n = a.filled(0).sum(axis=None) - d = float(umath.add.reduce((~mask).ravel())) - else: - w = array(filled(weights, 0.0), float, mask=mask).ravel() - n = add.reduce(a.ravel() * w) - d = add.reduce(w) - del w + m = getmask(a) + + # inspired by 'average' in numpy/lib/function_base.py + + if weights is None: + avg = a.mean(axis) + scl = avg.dtype.type(a.count(axis)) else: - if mask is nomask: - if weights is None: - d = ash[axis] * 1.0 - n = add.reduce(a._data, axis) - else: - w = filled(weights, 0.0) - wsh = w.shape - if wsh == (): - wsh = (1,) - if wsh == ash: - w = np.array(w, float, copy=0) - n = add.reduce(a * w, axis) - d = add.reduce(w, axis) - del w - elif wsh == (ash[axis],): - r = [None] * len(ash) - r[axis] = slice(None, None, 1) - w = eval("w[" + repr(tuple(r)) + "] * ones(ash, float)") - n = add.reduce(a * w, axis) - d = add.reduce(w, axis, dtype=float) - del w, r - else: - raise ValueError('average: weights wrong shape.') + wgt = np.asanyarray(weights) + + if issubclass(a.dtype.type, (np.integer, np.bool_)): + result_dtype = np.result_type(a.dtype, wgt.dtype, 'f8') else: - if weights is None: - n = add.reduce(a, axis) - d = umath.add.reduce((~mask), axis=axis, dtype=float) - else: - w = filled(weights, 0.0) - wsh = w.shape - if wsh == (): - wsh = (1,) - if wsh == ash: - w = array(w, dtype=float, mask=mask, copy=0) - n = add.reduce(a * w, axis) - d = add.reduce(w, axis, dtype=float) - elif wsh == (ash[axis],): - r = [None] * len(ash) - r[axis] = slice(None, None, 1) - w = eval("w[" + repr(tuple(r)) + - "] * masked_array(ones(ash, float), mask)") - n = add.reduce(a * w, axis) - d = add.reduce(w, axis, dtype=float) - else: - raise ValueError('average: weights wrong shape.') - del w - if n is masked or d is masked: - return masked - result = n / d - del n - - if isinstance(result, MaskedArray): - if ((axis is None) or (axis == 0 and a.ndim == 1)) and \ - (result.mask is nomask): - result = result._data - if returned: - if not isinstance(d, MaskedArray): - d = masked_array(d) - if isinstance(d, ndarray) and (not d.shape == result.shape): - d = ones(result.shape, dtype=float) * d + result_dtype = np.result_type(a.dtype, wgt.dtype) + + # Sanity checks + if a.shape != wgt.shape: + if axis is None: + raise TypeError( + "Axis must be specified when shapes of a and weights " + "differ.") + if wgt.ndim != 1: + raise TypeError( + "1D weights expected when shapes of a and weights differ.") + if wgt.shape[0] != a.shape[axis]: + raise ValueError( + "Length of weights not compatible with specified axis.") + + # setup wgt to broadcast along axis + wgt = np.broadcast_to(wgt, (a.ndim-1)*(1,) + wgt.shape) + wgt = wgt.swapaxes(-1, axis) + + if m is not nomask: + wgt = wgt*(~a.mask) + + scl = wgt.sum(axis=axis, dtype=result_dtype) + avg = np.multiply(a, wgt, dtype=result_dtype).sum(axis)/scl + if returned: - return result, d + if scl.shape != avg.shape: + scl = np.broadcast_to(scl, avg.shape).copy() + return avg, scl else: - return result + return avg -def median(a, axis=None, out=None, overwrite_input=False): +def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): """ Compute the median along the specified axis. @@ -628,6 +585,12 @@ def median(a, axis=None, out=None, overwrite_input=False): but it will probably be fully or partially sorted. Default is False. Note that, if `overwrite_input` is True, and the input is not already an `ndarray`, an error will be raised. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + .. versionadded:: 1.10.0 Returns ------- @@ -665,7 +628,17 @@ def median(a, axis=None, out=None, overwrite_input=False): """ if not hasattr(a, 'mask') or np.count_nonzero(a.mask) == 0: return masked_array(np.median(getdata(a, subok=True), axis=axis, - out=out, overwrite_input=overwrite_input), copy=False) + out=out, overwrite_input=overwrite_input, + keepdims=keepdims), copy=False) + + r, k = _ureduce(a, func=_median, axis=axis, out=out, + overwrite_input=overwrite_input) + if keepdims: + return r.reshape(k) + else: + return r + +def _median(a, axis=None, out=None, overwrite_input=False): if overwrite_input: if axis is None: asorted = a.ravel()