Skip to content

ENH: Various improvements to Maskedarray repr #9792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/release/1.14.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -503,3 +503,10 @@ Seeding ``RandomState`` using an array requires a 1-d array
``RandomState`` previously would accept empty arrays or arrays with 2 or more
dimensions, which resulted in either a failure to seed (empty arrays) or for
some of the passed values to be ignored when setting the seed.

``MaskedArray`` objects show a more useful ``repr``
---------------------------------------------------
The ``repr`` of a ``MaskedArray`` is now closer to the python code that would
produce it, with arrays now being shown with commas and dtypes. Like the other
formatting changes, this can be disabled with the 1.13 legacy printing mode in
order to help transition doctests.
37 changes: 25 additions & 12 deletions numpy/core/arrayprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,30 @@ def dtype_is_implied(dtype):
return dtype.type in _typelessdata


def dtype_short_repr(dtype):
Copy link
Member Author

@eric-wieser eric-wieser Nov 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels like it might belong at dtype.__format__ in future, but for now I just wanted to avoid code duplication

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is also duplicate from arrayprint.py, I was just modifying it in #10032. Maybe I can rebase than on this PR and we can move the function.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused - this file is arrayprint - where is the other duplicate?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want me to split off a PR with just the first two commits, which we can merge first?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sorry I thought this was ma/core.py. This change is fine.

"""
Convert a dtype to a short form which evaluates to the same dtype.

The intent is roughly that the following holds

>>> from numpy import *
>>> assert eval(dtype_short_repr(dt)) == dt
"""
# handle these separately so they don't give garbage like str256
if issubclass(dtype.type, flexible):
if dtype.names:
return "%s" % str(dtype)
else:
return "'%s'" % str(dtype)

typename = dtype.name
# quote typenames which can't be represented as python variable names
if typename and not (typename[0].isalpha() and typename.isalnum()):
typename = repr(typename)

return typename


def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
"""
Return the string representation of an array.
Expand Down Expand Up @@ -1245,18 +1269,7 @@ def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):

if skipdtype:
return "%s(%s)" % (class_name, lst)

# determine typename
if issubclass(arr.dtype.type, flexible):
if arr.dtype.names:
typename = "%s" % str(arr.dtype)
else:
typename = "'%s'" % str(arr.dtype)
else:
typename = arr.dtype.name
# quote typenames which can't be represented as python variable names
if typename and not (typename[0].isalpha() and typename.isalnum()):
typename = "'%s'" % typename
typename = dtype_short_repr(arr.dtype)

prefix = "{}({},".format(class_name, lst)
suffix = "dtype={})".format(typename)
Expand Down
82 changes: 68 additions & 14 deletions numpy/ma/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2438,7 +2438,7 @@ def _recursive_printoption(result, mask, printopt):
return

# For better or worse, these end in a newline
_print_templates = dict(
_legacy_print_templates = dict(
long_std=textwrap.dedent("""\
masked_%(name)s(data =
%(data)s,
Expand Down Expand Up @@ -3881,23 +3881,77 @@ def __repr__(self):
else:
name = self._baseclass.__name__

is_long = self.ndim > 1
is_structured = bool(self.dtype.names)

parameters = dict(
name=name,
nlen=" " * len(name),
data=str(self),
mask=str(self._mask),
fill=str(self.fill_value),
dtype=str(self.dtype)
# 2016-11-19: Demoted to legacy format
if np.get_printoptions()['legacy'] == '1.13':
is_long = self.ndim > 1
parameters = dict(
name=name,
nlen=" " * len(name),
data=str(self),
mask=str(self._mask),
fill=str(self.fill_value),
dtype=str(self.dtype)
)
is_structured = bool(self.dtype.names)
key = '{}_{}'.format(
'long' if is_long else 'short',
'flx' if is_structured else 'std'
)
return _legacy_print_templates[key] % parameters

prefix = 'masked_{}('.format(name)

dtype_needed = (
not np.core.arrayprint.dtype_is_implied(self.dtype) or
np.all(self.mask) or
self.size == 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plain ndarrays also have a special case for size-0, more than 1 dimension:

>>> np.empty((0,1))
array([], shape=(0, 1), dtype=float64)

But I'm a bit ambivalent about that behavior since shape isn't a valid array keyword. But I don't have a better way of signalling that the shape is >1d.

Compare to np.ma.empty((0,1)).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just checked, that behavior goes back to before numpy existed!

Copy link
Member Author

@eric-wieser eric-wieser Nov 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An idea: Maybe print np.empty((0, 1)) as empty((0, 1), dtype=float64)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's pretty neat. I'm on board.

I'll try it out as an extra commit in the newline PR.

Copy link
Member Author

@eric-wieser eric-wieser Nov 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't - I want to get the newline PR in as is so that I can rebase this and dtype_short_repr

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or actually, we can try in a new PR to avoid cluttering things.

Copy link
Member

@ahaldane ahaldane Nov 20, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh ok I didn't see your reply. Agreed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can always create a new PR with common commits with the old PR - if you do that, and the old PR is merged, then you can avoid having to rebase by switching the "base branch" twice in the github UI.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect that some rebasing will be needed anyway since you'll touch the typename code... we'll see.

)

key = '{}_{}'.format(
'long' if is_long else 'short',
'flx' if is_structured else 'std'
# determine which keyword args need to be shown
keys = ['data', 'mask', 'fill_value']
if dtype_needed:
keys.append('dtype')

# array has only one row (non-column)
is_one_row = builtins.all(dim == 1 for dim in self.shape[:-1])

# choose what to indent each keyword with
min_indent = 2
if is_one_row:
# first key on the same line as the type, remaining keys
# aligned by equals
indents = {}
indents[keys[0]] = prefix
for k in keys[1:]:
n = builtins.max(min_indent, len(prefix + keys[0]) - len(k))
indents[k] = ' ' * n
prefix = '' # absorbed into the first indent
else:
# each key on its own line, indented by two spaces
indents = {k: ' ' * min_indent for k in keys}
prefix = prefix + '\n' # first key on the next line

# format the field values
reprs = {}
reprs['data'] = np.array2string(
self._insert_masked_print(),
separator=", ",
prefix=indents['data'] + 'data=')
reprs['mask'] = np.array2string(
self._mask,
separator=", ",
prefix=indents['mask'] + 'mask=')
reprs['fill_value'] = repr(self.fill_value)
if dtype_needed:
reprs['dtype'] = np.core.arrayprint.dtype_short_repr(self.dtype)

# join keys with values and indentations
result = ',\n'.join(
'{}{}={}'.format(indents[k], k, reprs[k])
for k in keys
)
return _print_templates[key] % parameters
return prefix + result + ')'

def _delegate_binop(self, other):
# This emulates the logic in
Expand Down
90 changes: 84 additions & 6 deletions numpy/ma/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import operator
import itertools
import sys
import textwrap
from functools import reduce


Expand Down Expand Up @@ -487,19 +488,96 @@ def test_deepcopy(self):
def test_str_repr(self):
a = array([0, 1, 2], mask=[False, True, False])
assert_equal(str(a), '[0 -- 2]')
assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n'
' mask = [False True False],\n'
' fill_value = 999999)\n')
assert_equal(
repr(a),
textwrap.dedent('''\
masked_array(data=[0, --, 2],
mask=[False, True, False],
fill_value=999999)''')
)

# arrays with a continuation
a = np.ma.arange(2000)
a[1:50] = np.ma.masked
assert_equal(
repr(a),
'masked_array(data = [0 -- -- ... 1997 1998 1999],\n'
' mask = [False True True ... False False False],\n'
' fill_value = 999999)\n'
textwrap.dedent('''\
masked_array(data=[0, --, --, ..., 1997, 1998, 1999],
mask=[False, True, True, ..., False, False, False],
fill_value=999999)''')
)

# line-wrapped 1d arrays are correctly aligned
a = np.ma.arange(20)
assert_equal(
repr(a),
textwrap.dedent('''\
masked_array(data=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19],
mask=False,
fill_value=999999)''')
)

# 2d arrays cause wrapping
a = array([[1, 2, 3], [4, 5, 6]], dtype=np.int8)
a[1,1] = np.ma.masked
assert_equal(
repr(a),
textwrap.dedent('''\
masked_array(
data=[[1, 2, 3],
[4, --, 6]],
mask=[[False, False, False],
[False, True, False]],
fill_value=999999,
dtype=int8)''')
)

# but not it they're a row vector
assert_equal(
repr(a[:1]),
textwrap.dedent('''\
masked_array(data=[[1, 2, 3]],
mask=[[False, False, False]],
fill_value=999999,
dtype=int8)''')
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Latest revision changed this case.

)

# dtype=int is implied, so not shown
assert_equal(
repr(a.astype(int)),
textwrap.dedent('''\
masked_array(
data=[[1, 2, 3],
[4, --, 6]],
mask=[[False, False, False],
[False, True, False]],
fill_value=999999)''')
)



def test_str_repr_legacy(self):
oldopts = np.get_printoptions()
np.set_printoptions(legacy='1.13')
try:
a = array([0, 1, 2], mask=[False, True, False])
assert_equal(str(a), '[0 -- 2]')
assert_equal(repr(a), 'masked_array(data = [0 -- 2],\n'
' mask = [False True False],\n'
' fill_value = 999999)\n')

a = np.ma.arange(2000)
a[1:50] = np.ma.masked
assert_equal(
repr(a),
'masked_array(data = [0 -- -- ... 1997 1998 1999],\n'
' mask = [False True True ... False False False],\n'
' fill_value = 999999)\n'
)
finally:
np.set_printoptions(**oldopts)

def test_0d_unicode(self):
u = u'caf\xe9'
utype = type(u)
Expand Down
13 changes: 8 additions & 5 deletions numpy/ma/tests/test_subclassing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
)
# from numpy.ma.core import (

def assert_startswith(a, b):
# produces a better error message than assert_(a.startswith(b))
assert_equal(a[:len(b)], b)

class SubArray(np.ndarray):
# Defines a generic np.ndarray subclass, that stores some metadata
Expand Down Expand Up @@ -336,25 +339,25 @@ def test_subclass_repr(self):
and 'array' for np.ndarray"""
x = np.arange(5)
mx = masked_array(x, mask=[True, False, True, False, False])
assert_(repr(mx).startswith('masked_array'))
assert_startswith(repr(mx), 'masked_array')
xsub = SubArray(x)
mxsub = masked_array(xsub, mask=[True, False, True, False, False])
assert_(repr(mxsub).startswith(
'masked_{0}(data = [-- 1 -- 3 4]'.format(SubArray.__name__)))
assert_startswith(repr(mxsub),
'masked_{0}(data=[--, 1, --, 3, 4]'.format(SubArray.__name__))

def test_subclass_str(self):
"""test str with subclass that has overridden str, setitem"""
# first without override
x = np.arange(5)
xsub = SubArray(x)
mxsub = masked_array(xsub, mask=[True, False, True, False, False])
assert_(str(mxsub) == '[-- 1 -- 3 4]')
assert_equal(str(mxsub), '[-- 1 -- 3 4]')

xcsub = ComplicatedSubArray(x)
assert_raises(ValueError, xcsub.__setitem__, 0,
np.ma.core.masked_print_option)
mxcsub = masked_array(xcsub, mask=[True, False, True, False, False])
assert_(str(mxcsub) == 'myprefix [-- 1 -- 3 4] mypostfix')
assert_equal(str(mxcsub), 'myprefix [-- 1 -- 3 4] mypostfix')

def test_pure_subclass_info_preservation(self):
# Test that ufuncs and methods conserve extra information consistently;
Expand Down