numpy · ahaldane · Nov 20, 2017 · Nov 19, 2017 · Nov 19, 2017 · Nov 19, 2017
diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst
@@ -503,3 +503,10 @@ Seeding ``RandomState`` using an array requires a 1-d array
 ``RandomState`` previously would accept empty arrays or arrays with 2 or more
 dimensions, which resulted in either a failure to seed (empty arrays) or for
 some of the passed values to be ignored when setting the seed.
+
+``MaskedArray`` objects show a more useful ``repr``
+---------------------------------------------------
+The ``repr`` of a ``MaskedArray`` is now closer to the python code that would
+produce it, with arrays now being shown with commas and dtypes. Like the other
+formatting changes, this can be disabled with the 1.13 legacy printing mode in
+order to help transition doctests.
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
@@ -1182,6 +1182,30 @@ def dtype_is_implied(dtype):
     return dtype.type in _typelessdata
 
 
+def dtype_short_repr(dtype):
+    """
+    Convert a dtype to a short form which evaluates to the same dtype.
+
+    The intent is roughly that the following holds
+
+    >>> from numpy import *
+    >>> assert eval(dtype_short_repr(dt)) == dt
+    """
+    # handle these separately so they don't give garbage like str256
+    if issubclass(dtype.type, flexible):
+        if dtype.names:
+            return "%s" % str(dtype)
+        else:
+            return "'%s'" % str(dtype)
+
+    typename = dtype.name
+    # quote typenames which can't be represented as python variable names
+    if typename and not (typename[0].isalpha() and typename.isalnum()):
+        typename = repr(typename)
+
+    return typename
+
+
 def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
     """
     Return the string representation of an array.
@@ -1245,18 +1269,7 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
 
     if skipdtype:
         return "%s(%s)" % (class_name, lst)
-
-    # determine typename
-    if issubclass(arr.dtype.type, flexible):
-        if arr.dtype.names:
-            typename = "%s" % str(arr.dtype)
-        else:
-            typename = "'%s'" % str(arr.dtype)
-    else:
-        typename = arr.dtype.name
-        # quote typenames which can't be represented as python variable names
-        if typename and not (typename[0].isalpha() and typename.isalnum()):
-            typename = "'%s'" % typename
+    typename = dtype_short_repr(arr.dtype)
 
     prefix = "{}({},".format(class_name, lst)
     suffix = "dtype={})".format(typename)

diff --git a/numpy/ma/core.py b/numpy/ma/core.py
@@ -2438,7 +2438,7 @@ def _recursive_printoption(result, mask, printopt):
     return
 
 # For better or worse, these end in a newline
-_print_templates = dict(
+_legacy_print_templates = dict(
     long_std=textwrap.dedent("""\
         masked_%(name)s(data =
          %(data)s,
@@ -3881,23 +3881,77 @@ def __repr__(self):
         else:
             name = self._baseclass.__name__
 
-        is_long = self.ndim > 1
-        is_structured = bool(self.dtype.names)
 
-        parameters = dict(
-            name=name,
-            nlen=" " * len(name),
-            data=str(self),
-            mask=str(self._mask),
-            fill=str(self.fill_value),
-            dtype=str(self.dtype)
+        # 2016-11-19: Demoted to legacy format
+        if np.get_printoptions()['legacy'] == '1.13':
+            is_long = self.ndim > 1
+            parameters = dict(
+                name=name,
+                nlen=" " * len(name),
+                data=str(self),
+                mask=str(self._mask),
+                fill=str(self.fill_value),
+                dtype=str(self.dtype)
+            )
+            is_structured = bool(self.dtype.names)
+            key = '{}_{}'.format(
+                'long' if is_long else 'short',
+                'flx' if is_structured else 'std'
+            )
+            return _legacy_print_templates[key] % parameters
+
+        prefix = 'masked_{}('.format(name)
+
+        dtype_needed = (
+            not np.core.arrayprint.dtype_is_implied(self.dtype) or
+            np.all(self.mask) or
+            self.size == 0
         )
 
-        key = '{}_{}'.format(
-            'long' if is_long else 'short',
-            'flx' if is_structured else 'std'
+        # determine which keyword args need to be shown
+        keys = ['data', 'mask', 'fill_value']
+        if dtype_needed:
+            keys.append('dtype')
+
+        # array has only one row (non-column)
+        is_one_row = builtins.all(dim == 1 for dim in self.shape[:-1])
+
+        # choose what to indent each keyword with
+        min_indent = 2
+        if is_one_row:
+            # first key on the same line as the type, remaining keys
+            # aligned by equals
+            indents = {}
+            indents[keys[0]] = prefix
+            for k in keys[1:]:
+                n = builtins.max(min_indent, len(prefix + keys[0]) - len(k))
+                indents[k] = ' ' * n
+            prefix = ''  # absorbed into the first indent
+        else:
+            # each key on its own line, indented by two spaces
+            indents = {k: ' ' * min_indent for k in keys}
+            prefix = prefix + '\n'  # first key on the next line
+
+        # format the field values
+        reprs = {}
+        reprs['data'] = np.array2string(
+            self._insert_masked_print(),
+            separator=", ",
+            prefix=indents['data'] + 'data=')
+        reprs['mask'] = np.array2string(
+            self._mask,
+            separator=", ",
+            prefix=indents['mask'] + 'mask=')
+        reprs['fill_value'] = repr(self.fill_value)
+        if dtype_needed:
+            reprs['dtype'] = np.core.arrayprint.dtype_short_repr(self.dtype)
+
+        # join keys with values and indentations
+        result = ',\n'.join(
+            '{}{}={}'.format(indents[k], k, reprs[k])
+            for k in keys
         )
-        return _print_templates[key] % parameters
+        return prefix + result + ')'
 
     def _delegate_binop(self, other):
         # This emulates the logic in

diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
@@ -14,6 +14,7 @@
 import operator
 import itertools
 import sys
+import textwrap
 from functools import reduce
 
 
@@ -487,19 +488,96 @@ def test_deepcopy(self):
     def test_str_repr(self):
         a = array([0, 1, 2], mask=[False, True, False])
         assert_equal(str(a), '[0 -- 2]')
-        assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n'
-                              '             mask = [False  True False],\n'
-                              '       fill_value = 999999)\n')
+        assert_equal(
+            repr(a),
+            textwrap.dedent('''\
+            masked_array(data=[0, --, 2],
+                         mask=[False,  True, False],
+                   fill_value=999999)''')
+        )
 
+        # arrays with a continuation
         a = np.ma.arange(2000)
         a[1:50] = np.ma.masked
         assert_equal(
             repr(a),
-            'masked_array(data = [0 -- -- ... 1997 1998 1999],\n'
-            '             mask = [False  True  True ... False False False],\n'
-            '       fill_value = 999999)\n'
+            textwrap.dedent('''\
+            masked_array(data=[0, --, --, ..., 1997, 1998, 1999],
+                         mask=[False,  True,  True, ..., False, False, False],
+                   fill_value=999999)''')
+        )
+
+        # line-wrapped 1d arrays are correctly aligned
+        a = np.ma.arange(20)
+        assert_equal(
+            repr(a),
+            textwrap.dedent('''\
+            masked_array(data=[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
+                               14, 15, 16, 17, 18, 19],
+                         mask=False,
+                   fill_value=999999)''')
+        )
+
+        # 2d arrays cause wrapping
+        a = array([[1, 2, 3], [4, 5, 6]], dtype=np.int8)
+        a[1,1] = np.ma.masked
+        assert_equal(
+            repr(a),
+            textwrap.dedent('''\
+            masked_array(
+              data=[[1, 2, 3],
+                    [4, --, 6]],
+              mask=[[False, False, False],
+                    [False,  True, False]],
+              fill_value=999999,
+              dtype=int8)''')
         )
 
+        # but not it they're a row vector
+        assert_equal(
+            repr(a[:1]),
+            textwrap.dedent('''\
+            masked_array(data=[[1, 2, 3]],
+                         mask=[[False, False, False]],
+                   fill_value=999999,
+                        dtype=int8)''')
+        )
+
+        # dtype=int is implied, so not shown
+        assert_equal(
+            repr(a.astype(int)),
+            textwrap.dedent('''\
+            masked_array(
+              data=[[1, 2, 3],
+                    [4, --, 6]],
+              mask=[[False, False, False],
+                    [False,  True, False]],
+              fill_value=999999)''')
+        )
+
+
+
+    def test_str_repr_legacy(self):
+        oldopts = np.get_printoptions()
+        np.set_printoptions(legacy='1.13')
+        try:
+            a = array([0, 1, 2], mask=[False, True, False])
+            assert_equal(str(a), '[0 -- 2]')
+            assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n'
+                                  '             mask = [False  True False],\n'
+                                  '       fill_value = 999999)\n')
+
+            a = np.ma.arange(2000)
+            a[1:50] = np.ma.masked
+            assert_equal(
+                repr(a),
+                'masked_array(data = [0 -- -- ... 1997 1998 1999],\n'
+                '             mask = [False  True  True ... False False False],\n'
+                '       fill_value = 999999)\n'
+            )
+        finally:
+            np.set_printoptions(**oldopts)
+
     def test_0d_unicode(self):
         u = u'caf\xe9'
         utype = type(u)

diff --git a/numpy/ma/tests/test_subclassing.py b/numpy/ma/tests/test_subclassing.py
@@ -17,6 +17,9 @@
     )
 # from numpy.ma.core import (
 
+def assert_startswith(a, b):
+    # produces a better error message than assert_(a.startswith(b))
+    assert_equal(a[:len(b)], b)
 
 class SubArray(np.ndarray):
     # Defines a generic np.ndarray subclass, that stores some metadata
@@ -336,25 +339,25 @@ def test_subclass_repr(self):
         and 'array' for np.ndarray"""
         x = np.arange(5)
         mx = masked_array(x, mask=[True, False, True, False, False])
-        assert_(repr(mx).startswith('masked_array'))
+        assert_startswith(repr(mx), 'masked_array')
         xsub = SubArray(x)
         mxsub = masked_array(xsub, mask=[True, False, True, False, False])
-        assert_(repr(mxsub).startswith(
-            'masked_{0}(data = [-- 1 -- 3 4]'.format(SubArray.__name__)))
+        assert_startswith(repr(mxsub),
+            'masked_{0}(data=[--, 1, --, 3, 4]'.format(SubArray.__name__))
 
     def test_subclass_str(self):
         """test str with subclass that has overridden str, setitem"""
         # first without override
         x = np.arange(5)
         xsub = SubArray(x)
         mxsub = masked_array(xsub, mask=[True, False, True, False, False])
-        assert_(str(mxsub) == '[-- 1 -- 3 4]')
+        assert_equal(str(mxsub), '[-- 1 -- 3 4]')
 
         xcsub = ComplicatedSubArray(x)
         assert_raises(ValueError, xcsub.__setitem__, 0,
                       np.ma.core.masked_print_option)
         mxcsub = masked_array(xcsub, mask=[True, False, True, False, False])
-        assert_(str(mxcsub) == 'myprefix [-- 1 -- 3 4] mypostfix')
+        assert_equal(str(mxcsub), 'myprefix [-- 1 -- 3 4] mypostfix')
 
     def test_pure_subclass_info_preservation(self):
         # Test that ufuncs and methods conserve extra information consistently;