From dc0b8bdf5aaf03a9698182a7db6deffd830a81e0 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 19 Nov 2017 01:10:23 -0800 Subject: [PATCH 1/3] TST: Improve assertion error messages --- numpy/ma/tests/test_subclassing.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/numpy/ma/tests/test_subclassing.py b/numpy/ma/tests/test_subclassing.py index e59dd4656bd2..5d71427cafbe 100644 --- a/numpy/ma/tests/test_subclassing.py +++ b/numpy/ma/tests/test_subclassing.py @@ -17,6 +17,9 @@ ) # from numpy.ma.core import ( +def assert_startswith(a, b): + # produces a better error message than assert_(a.startswith(b)) + assert_equal(a[:len(b)], b) class SubArray(np.ndarray): # Defines a generic np.ndarray subclass, that stores some metadata @@ -336,11 +339,11 @@ def test_subclass_repr(self): and 'array' for np.ndarray""" x = np.arange(5) mx = masked_array(x, mask=[True, False, True, False, False]) - assert_(repr(mx).startswith('masked_array')) + assert_startswith(repr(mx), 'masked_array') xsub = SubArray(x) mxsub = masked_array(xsub, mask=[True, False, True, False, False]) - assert_(repr(mxsub).startswith( - 'masked_{0}(data = [-- 1 -- 3 4]'.format(SubArray.__name__))) + assert_startswith(repr(mxsub), + 'masked_{0}(data=[-- 1 -- 3 4]'.format(SubArray.__name__)) def test_subclass_str(self): """test str with subclass that has overridden str, setitem""" @@ -348,13 +351,13 @@ def test_subclass_str(self): x = np.arange(5) xsub = SubArray(x) mxsub = masked_array(xsub, mask=[True, False, True, False, False]) - assert_(str(mxsub) == '[-- 1 -- 3 4]') + assert_equal(str(mxsub), '[-- 1 -- 3 4]') xcsub = ComplicatedSubArray(x) assert_raises(ValueError, xcsub.__setitem__, 0, np.ma.core.masked_print_option) mxcsub = masked_array(xcsub, mask=[True, False, True, False, False]) - assert_(str(mxcsub) == 'myprefix [-- 1 -- 3 4] mypostfix') + assert_equal(str(mxcsub), 'myprefix [-- 1 -- 3 4] mypostfix') def test_pure_subclass_info_preservation(self): # Test that ufuncs and methods conserve extra information consistently; From 18f5436641ef0a288639d81d53144706ebcd7cd3 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 19 Nov 2017 01:11:05 -0800 Subject: [PATCH 2/3] MAINT: Extract dtype printing code into helper function --- numpy/core/arrayprint.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index 95c770972710..d57d7636869f 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -1182,6 +1182,30 @@ def dtype_is_implied(dtype): return dtype.type in _typelessdata +def dtype_short_repr(dtype): + """ + Convert a dtype to a short form which evaluates to the same dtype. + + The intent is roughly that the following holds + + >>> from numpy import * + >>> assert eval(dtype_short_repr(dt)) == dt + """ + # handle these separately so they don't give garbage like str256 + if issubclass(dtype.type, flexible): + if dtype.names: + return "%s" % str(dtype) + else: + return "'%s'" % str(dtype) + + typename = dtype.name + # quote typenames which can't be represented as python variable names + if typename and not (typename[0].isalpha() and typename.isalnum()): + typename = repr(typename) + + return typename + + def array_repr(arr, max_line_width=None, precision=None, suppress_small=None): """ Return the string representation of an array. @@ -1245,18 +1269,7 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None): if skipdtype: return "%s(%s)" % (class_name, lst) - - # determine typename - if issubclass(arr.dtype.type, flexible): - if arr.dtype.names: - typename = "%s" % str(arr.dtype) - else: - typename = "'%s'" % str(arr.dtype) - else: - typename = arr.dtype.name - # quote typenames which can't be represented as python variable names - if typename and not (typename[0].isalpha() and typename.isalnum()): - typename = "'%s'" % typename + typename = dtype_short_repr(arr.dtype) prefix = "{}({},".format(class_name, lst) suffix = "dtype={})".format(typename) From 8bac6eed123c68560ffdb5d7f7093ad4015a9d85 Mon Sep 17 00:00:00 2001 From: Eric Wieser Date: Sun, 19 Nov 2017 01:25:10 -0800 Subject: [PATCH 3/3] ENH: Improve MaskedArray.__repr__ * Commas are now used within data and mask * dtypes are shown for float32, int8 etc, where they previously weren't * Wrapped fields are now correctly indented * Spaces removed around = to match PEP8 --- doc/release/1.14.0-notes.rst | 7 +++ numpy/ma/core.py | 82 ++++++++++++++++++++++----- numpy/ma/tests/test_core.py | 90 ++++++++++++++++++++++++++++-- numpy/ma/tests/test_subclassing.py | 2 +- 4 files changed, 160 insertions(+), 21 deletions(-) diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst index edffcc0a4c76..cdc50cc4f48b 100644 --- a/doc/release/1.14.0-notes.rst +++ b/doc/release/1.14.0-notes.rst @@ -503,3 +503,10 @@ Seeding ``RandomState`` using an array requires a 1-d array ``RandomState`` previously would accept empty arrays or arrays with 2 or more dimensions, which resulted in either a failure to seed (empty arrays) or for some of the passed values to be ignored when setting the seed. + +``MaskedArray`` objects show a more useful ``repr`` +--------------------------------------------------- +The ``repr`` of a ``MaskedArray`` is now closer to the python code that would +produce it, with arrays now being shown with commas and dtypes. Like the other +formatting changes, this can be disabled with the 1.13 legacy printing mode in +order to help transition doctests. diff --git a/numpy/ma/core.py b/numpy/ma/core.py index a10d354ee083..a09ec6bdb666 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -2438,7 +2438,7 @@ def _recursive_printoption(result, mask, printopt): return # For better or worse, these end in a newline -_print_templates = dict( +_legacy_print_templates = dict( long_std=textwrap.dedent("""\ masked_%(name)s(data = %(data)s, @@ -3881,23 +3881,77 @@ def __repr__(self): else: name = self._baseclass.__name__ - is_long = self.ndim > 1 - is_structured = bool(self.dtype.names) - parameters = dict( - name=name, - nlen=" " * len(name), - data=str(self), - mask=str(self._mask), - fill=str(self.fill_value), - dtype=str(self.dtype) + # 2016-11-19: Demoted to legacy format + if np.get_printoptions()['legacy'] == '1.13': + is_long = self.ndim > 1 + parameters = dict( + name=name, + nlen=" " * len(name), + data=str(self), + mask=str(self._mask), + fill=str(self.fill_value), + dtype=str(self.dtype) + ) + is_structured = bool(self.dtype.names) + key = '{}_{}'.format( + 'long' if is_long else 'short', + 'flx' if is_structured else 'std' + ) + return _legacy_print_templates[key] % parameters + + prefix = 'masked_{}('.format(name) + + dtype_needed = ( + not np.core.arrayprint.dtype_is_implied(self.dtype) or + np.all(self.mask) or + self.size == 0 ) - key = '{}_{}'.format( - 'long' if is_long else 'short', - 'flx' if is_structured else 'std' + # determine which keyword args need to be shown + keys = ['data', 'mask', 'fill_value'] + if dtype_needed: + keys.append('dtype') + + # array has only one row (non-column) + is_one_row = builtins.all(dim == 1 for dim in self.shape[:-1]) + + # choose what to indent each keyword with + min_indent = 2 + if is_one_row: + # first key on the same line as the type, remaining keys + # aligned by equals + indents = {} + indents[keys[0]] = prefix + for k in keys[1:]: + n = builtins.max(min_indent, len(prefix + keys[0]) - len(k)) + indents[k] = ' ' * n + prefix = '' # absorbed into the first indent + else: + # each key on its own line, indented by two spaces + indents = {k: ' ' * min_indent for k in keys} + prefix = prefix + '\n' # first key on the next line + + # format the field values + reprs = {} + reprs['data'] = np.array2string( + self._insert_masked_print(), + separator=", ", + prefix=indents['data'] + 'data=') + reprs['mask'] = np.array2string( + self._mask, + separator=", ", + prefix=indents['mask'] + 'mask=') + reprs['fill_value'] = repr(self.fill_value) + if dtype_needed: + reprs['dtype'] = np.core.arrayprint.dtype_short_repr(self.dtype) + + # join keys with values and indentations + result = ',\n'.join( + '{}{}={}'.format(indents[k], k, reprs[k]) + for k in keys ) - return _print_templates[key] % parameters + return prefix + result + ')' def _delegate_binop(self, other): # This emulates the logic in diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 03de71f81d8c..be56833fdf0a 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -14,6 +14,7 @@ import operator import itertools import sys +import textwrap from functools import reduce @@ -487,19 +488,96 @@ def test_deepcopy(self): def test_str_repr(self): a = array([0, 1, 2], mask=[False, True, False]) assert_equal(str(a), '[0 -- 2]') - assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n' - ' mask = [False True False],\n' - ' fill_value = 999999)\n') + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[0, --, 2], + mask=[False, True, False], + fill_value=999999)''') + ) + # arrays with a continuation a = np.ma.arange(2000) a[1:50] = np.ma.masked assert_equal( repr(a), - 'masked_array(data = [0 -- -- ... 1997 1998 1999],\n' - ' mask = [False True True ... False False False],\n' - ' fill_value = 999999)\n' + textwrap.dedent('''\ + masked_array(data=[0, --, --, ..., 1997, 1998, 1999], + mask=[False, True, True, ..., False, False, False], + fill_value=999999)''') + ) + + # line-wrapped 1d arrays are correctly aligned + a = np.ma.arange(20) + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array(data=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19], + mask=False, + fill_value=999999)''') + ) + + # 2d arrays cause wrapping + a = array([[1, 2, 3], [4, 5, 6]], dtype=np.int8) + a[1,1] = np.ma.masked + assert_equal( + repr(a), + textwrap.dedent('''\ + masked_array( + data=[[1, 2, 3], + [4, --, 6]], + mask=[[False, False, False], + [False, True, False]], + fill_value=999999, + dtype=int8)''') ) + # but not it they're a row vector + assert_equal( + repr(a[:1]), + textwrap.dedent('''\ + masked_array(data=[[1, 2, 3]], + mask=[[False, False, False]], + fill_value=999999, + dtype=int8)''') + ) + + # dtype=int is implied, so not shown + assert_equal( + repr(a.astype(int)), + textwrap.dedent('''\ + masked_array( + data=[[1, 2, 3], + [4, --, 6]], + mask=[[False, False, False], + [False, True, False]], + fill_value=999999)''') + ) + + + + def test_str_repr_legacy(self): + oldopts = np.get_printoptions() + np.set_printoptions(legacy='1.13') + try: + a = array([0, 1, 2], mask=[False, True, False]) + assert_equal(str(a), '[0 -- 2]') + assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n' + ' mask = [False True False],\n' + ' fill_value = 999999)\n') + + a = np.ma.arange(2000) + a[1:50] = np.ma.masked + assert_equal( + repr(a), + 'masked_array(data = [0 -- -- ... 1997 1998 1999],\n' + ' mask = [False True True ... False False False],\n' + ' fill_value = 999999)\n' + ) + finally: + np.set_printoptions(**oldopts) + def test_0d_unicode(self): u = u'caf\xe9' utype = type(u) diff --git a/numpy/ma/tests/test_subclassing.py b/numpy/ma/tests/test_subclassing.py index 5d71427cafbe..a54574690e50 100644 --- a/numpy/ma/tests/test_subclassing.py +++ b/numpy/ma/tests/test_subclassing.py @@ -343,7 +343,7 @@ def test_subclass_repr(self): xsub = SubArray(x) mxsub = masked_array(xsub, mask=[True, False, True, False, False]) assert_startswith(repr(mxsub), - 'masked_{0}(data=[-- 1 -- 3 4]'.format(SubArray.__name__)) + 'masked_{0}(data=[--, 1, --, 3, 4]'.format(SubArray.__name__)) def test_subclass_str(self): """test str with subclass that has overridden str, setitem"""