Skip to content

Refactor color parsing of Axes.scatter #11663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
259 changes: 151 additions & 108 deletions lib/matplotlib/axes/_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import itertools
import logging
import math
import operator
from numbers import Number
import warnings

Expand Down Expand Up @@ -4012,6 +4013,150 @@ def dopatch(xs, ys, **kwargs):
return dict(whiskers=whiskers, caps=caps, boxes=boxes,
medians=medians, fliers=fliers, means=means)

def _parse_scatter_color_args(self, c, edgecolors, kwargs, xshape, yshape):
"""
Helper function to process color related arguments of `.Axes.scatter`.

Argument precedence for facecolors:

- c (if not None)
- kwargs['facecolors']
- kwargs['facecolor']
- kwargs['color'] (==kwcolor)
- 'b' if in classic mode else next color from color cycle

Argument precedence for edgecolors:

- edgecolors (is an explicit kw argument in scatter())
- kwargs['edgecolor']
- kwargs['color'] (==kwcolor)
- 'face' if not in classic mode else None

Arguments
---------
c : color or sequence or sequence of color or None
See argument description of `.Axes.scatter`.
edgecolors : color or sequence of color or {'face', 'none'} or None
See argument description of `.Axes.scatter`.
kwargs : dict
Additional kwargs. If these keys exist, we pop and process them:
'facecolors', 'facecolor', 'edgecolor', 'color'
Note: The dict is modified by this function.
xshape, yshape : tuple of int
The shape of the x and y arrays passed to `.Axes.scatter`.

Returns
-------
c
The input *c* if it was not *None*, else some color specification
derived from the other inputs or defaults.
colors : array(N, 4) or None
The facecolors as RGBA values or *None* if a colormap is used.
edgecolors
The edgecolor specification.

"""
xsize = functools.reduce(operator.mul, xshape, 1)
ysize = functools.reduce(operator.mul, yshape, 1)

facecolors = kwargs.pop('facecolors', None)
facecolors = kwargs.pop('facecolor', facecolors)
edgecolors = kwargs.pop('edgecolor', edgecolors)

kwcolor = kwargs.pop('color', None)

if kwcolor is not None and c is not None:
raise ValueError("Supply a 'c' argument or a 'color'"
" kwarg but not both; they differ but"
" their functionalities overlap.")

if kwcolor is not None:
try:
mcolors.to_rgba_array(kwcolor)
except ValueError:
raise ValueError("'color' kwarg must be an mpl color"
" spec or sequence of color specs.\n"
"For a sequence of values to be color-mapped,"
" use the 'c' argument instead.")
if edgecolors is None:
edgecolors = kwcolor
if facecolors is None:
facecolors = kwcolor

if edgecolors is None and not rcParams['_internal.classic_mode']:
edgecolors = 'face'

c_was_none = c is None
if c is None:
c = (facecolors if facecolors is not None
else "b" if rcParams['_internal.classic_mode']
else self._get_patches_for_fill.get_next_color())

# After this block, c_array will be None unless
# c is an array for mapping. The potential ambiguity
# with a sequence of 3 or 4 numbers is resolved in
# favor of mapping, not rgb or rgba.
# Convenience vars to track shape mismatch *and* conversion failures.
valid_shape = True # will be put to the test!
n_elem = -1 # used only for (some) exceptions

if (c_was_none or
kwcolor is not None or
isinstance(c, str) or
(isinstance(c, collections.abc.Iterable) and
len(c) > 0 and
isinstance(cbook.safe_first_element(c), str))):
c_array = None
else:
try: # First, does 'c' look suitable for value-mapping?
c_array = np.asanyarray(c, dtype=float)
n_elem = c_array.shape[0]
if c_array.shape in [xshape, yshape]:
Copy link
Contributor

@anntzer anntzer Nov 4, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something funny is going on here (I know it was already there before, but still seems worth pointing out): we take x and y of any shapes and flatten them (they just need to have the same size), but c has to match either the shape of x or y, not just the size.

In other words the following "work" (i.e. perform an implicit ravel()):

scatter(np.arange(12).reshape((3, 4)), np.arange(12).reshape((4, 3)), c=np.arange(12).reshape((3, 4)))
scatter(np.arange(12).reshape((3, 4)), np.arange(12).reshape((4, 3)), c=np.arange(12).reshape((4, 3)))

but the following fail:

scatter(np.arange(12).reshape((3, 4)), np.arange(12).reshape((4, 3)), c=np.arange(12).reshape((6, 2)))
# and even
scatter(np.arange(12).reshape((3, 4)), np.arange(12).reshape((4, 3)), c=np.arange(12))

Of course that last one has the best error message (irony intended):

ValueError: 'c' argument has 12 elements, which is not acceptable for use with 'x' with size 12, 'y' with size 12.

Copy link
Member Author

@timhoffm timhoffm Nov 4, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well observed. I prefer not to change functionality as part of this refactoring, but I'm happy to open an issue/pr for that. - Also not sure what the intended behavior should be (should we modify c to be as permissive as with x/y, or should we be more strict about x/y shape?

Edit: Issue added #12735

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ValueError: 'c' argument has 12 elements, which is not acceptable for use with 'x' with size 12, 'y' with size 12.

is a really awful error. Has the intuitive functionality that was present before been restored?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please continue this conversation at #12735. I think a PR that gave a better error for this case would be most welcome.

c = np.ma.ravel(c_array)
else:
if c_array.shape in ((3,), (4,)):
_log.warning(
"'c' argument looks like a single numeric RGB or "
"RGBA sequence, which should be avoided as value-"
"mapping will have precedence in case its length "
"matches with 'x' & 'y'. Please use a 2-D array "
"with a single row if you really want to specify "
"the same RGB or RGBA value for all points.")
# Wrong size; it must not be intended for mapping.
valid_shape = False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I have a mild preference for moving the invalid shape error message up and just do raise ValueError(invalid_shape.format(...)) here and below instead of keeping the valid_shape and n_elem tracking variables.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel that the overall structure is not very clear and that it can be written much more concise, but that's beyond this PR. Therefore I'd like to defer any further code changes within the function. As written in the original PR message:

I'm quite sure this function can be untangled more. This will become easier after this PR, because we can now add a further tests.

So the strategy is:

  1. Get this PR in without changing more than neccesary of the original code.
  2. Move the tests to directly testing Axes._parse_scatter_color_args, which is significantly faster than creating a figure each time.
  3. This allows adding more tests an thus be surer that code changes do not modify the functionality.

c_array = None
except ValueError:
# Failed to make a floating-point array; c must be color specs.
c_array = None
if c_array is None:
try: # Then is 'c' acceptable as PathCollection facecolors?
colors = mcolors.to_rgba_array(c)
n_elem = colors.shape[0]
if colors.shape[0] not in (0, 1, xsize, ysize):
# NB: remember that a single color is also acceptable.
# Besides *colors* will be an empty array if c == 'none'.
valid_shape = False
raise ValueError
except ValueError:
if not valid_shape: # but at least one conversion succeeded.
raise ValueError(
"'c' argument has {nc} elements, which is not "
"acceptable for use with 'x' with size {xs}, "
"'y' with size {ys}."
.format(nc=n_elem, xs=xsize, ys=ysize)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of tracking n_elem I think you can just do nc=np.shape(c)[0] here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can have edge cases in which np.shape(c)[0] will raise here. E.g.

plt.scatter([1,2], [1,2], c='rgb')

...
ValueError:

During handling of the above exception, another exception occurred:

~/dev/matplotlib/lib/matplotlib/axes/_axes.py in _parse_scatter_color_args(self, c, edgecolors, kwargs, xshape, yshape)
   4144                         "acceptable for use with 'x' with size {xs}, "
   4145                         "'y' with size {ys}."
-> 4146                             .format(nc=np.shape(c)[0], xs=xsize, ys=ysize)
   4147                     )
   4148                 else:

IndexError: tuple index out of range

Makes shure the intended message is printed, even if we cannot determine the shape of c. Even if it says -1 there it's better than just "tuple index out of range".

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough

)
else:
# Both the mapping *and* the RGBA conversion failed: pretty
# severe failure => one may appreciate a verbose feedback.
raise ValueError(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At that point the only error we can get is the one thrown by to_rgba_array which I think is just fine to let through (Invalid RGBA argument: ...).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above would like to defer this to future PRs.

"'c' argument must be a mpl color, a sequence of mpl "
"colors or a sequence of numbers, not {}."
.format(c) # note: could be long depending on c
)
else:
colors = None # use cmap, norm after collection is created
return c, colors, edgecolors

@_preprocess_data(replace_names=["x", "y", "s", "linewidths",
"edgecolors", "c", "facecolor",
"facecolors", "color"],
Expand Down Expand Up @@ -4125,129 +4270,27 @@ def scatter(self, x, y, s=None, c=None, marker=None, cmap=None, norm=None,

"""
# Process **kwargs to handle aliases, conflicts with explicit kwargs:
facecolors = None
edgecolors = kwargs.pop('edgecolor', edgecolors)
fc = kwargs.pop('facecolors', None)
fc = kwargs.pop('facecolor', fc)
if fc is not None:
facecolors = fc
co = kwargs.pop('color', None)
if co is not None:
try:
mcolors.to_rgba_array(co)
except ValueError:
raise ValueError("'color' kwarg must be an mpl color"
" spec or sequence of color specs.\n"
"For a sequence of values to be color-mapped,"
" use the 'c' argument instead.")
if edgecolors is None:
edgecolors = co
if facecolors is None:
facecolors = co
if c is not None:
raise ValueError("Supply a 'c' argument or a 'color'"
" kwarg but not both; they differ but"
" their functionalities overlap.")
if c is None:
if facecolors is not None:
c = facecolors
else:
if rcParams['_internal.classic_mode']:
c = 'b' # The original default
else:
c = self._get_patches_for_fill.get_next_color()
c_none = True
else:
c_none = False

if edgecolors is None and not rcParams['_internal.classic_mode']:
edgecolors = 'face'

self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs)
x = self.convert_xunits(x)
y = self.convert_yunits(y)

# np.ma.ravel yields an ndarray, not a masked array,
# unless its argument is a masked array.
xy_shape = (np.shape(x), np.shape(y))
xshape, yshape = np.shape(x), np.shape(y)
x = np.ma.ravel(x)
y = np.ma.ravel(y)
if x.size != y.size:
raise ValueError("x and y must be the same size")

if s is None:
if rcParams['_internal.classic_mode']:
s = 20
else:
s = rcParams['lines.markersize'] ** 2.0

s = (20 if rcParams['_internal.classic_mode'] else
rcParams['lines.markersize'] ** 2.0)
s = np.ma.ravel(s) # This doesn't have to match x, y in size.

# After this block, c_array will be None unless
# c is an array for mapping. The potential ambiguity
# with a sequence of 3 or 4 numbers is resolved in
# favor of mapping, not rgb or rgba.

# Convenience vars to track shape mismatch *and* conversion failures.
valid_shape = True # will be put to the test!
n_elem = -1 # used only for (some) exceptions

if (c_none or
co is not None or
isinstance(c, str) or
(isinstance(c, collections.Iterable) and
len(c) > 0 and
isinstance(cbook.safe_first_element(c), str))):
c_array = None
else:
try: # First, does 'c' look suitable for value-mapping?
c_array = np.asanyarray(c, dtype=float)
n_elem = c_array.shape[0]
if c_array.shape in xy_shape:
c = np.ma.ravel(c_array)
else:
if c_array.shape in ((3,), (4,)):
_log.warning(
"'c' argument looks like a single numeric RGB or "
"RGBA sequence, which should be avoided as value-"
"mapping will have precedence in case its length "
"matches with 'x' & 'y'. Please use a 2-D array "
"with a single row if you really want to specify "
"the same RGB or RGBA value for all points.")
# Wrong size; it must not be intended for mapping.
valid_shape = False
c_array = None
except ValueError:
# Failed to make a floating-point array; c must be color specs.
c_array = None

if c_array is None:
try: # Then is 'c' acceptable as PathCollection facecolors?
colors = mcolors.to_rgba_array(c)
n_elem = colors.shape[0]
if colors.shape[0] not in (0, 1, x.size, y.size):
# NB: remember that a single color is also acceptable.
# Besides *colors* will be an empty array if c == 'none'.
valid_shape = False
raise ValueError
except ValueError:
if not valid_shape: # but at least one conversion succeeded.
raise ValueError(
"'c' argument has {nc} elements, which is not "
"acceptable for use with 'x' with size {xs}, "
"'y' with size {ys}."
.format(nc=n_elem, xs=x.size, ys=y.size)
)
# Both the mapping *and* the RGBA conversion failed: pretty
# severe failure => one may appreciate a verbose feedback.
raise ValueError(
"'c' argument must either be valid as mpl color(s) "
"or as numbers to be mapped to colors. "
"Here c = {}." # <- beware, could be long depending on c.
.format(c)
)
else:
colors = None # use cmap, norm after collection is created
c, colors, edgecolors = \
self._parse_scatter_color_args(c, edgecolors, kwargs,
xshape, yshape)

# `delete_masked_points` only modifies arguments of the same length as
# `x`.
Expand Down
62 changes: 61 additions & 1 deletion lib/matplotlib/tests/test_axes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import namedtuple
from itertools import product
from distutils.version import LooseVersion
import io
Expand Down Expand Up @@ -1795,7 +1796,7 @@ def test_scatter_c(self, c_case, re_key):
# Additional checking of *c* (introduced in #11383).
REGEXP = {
"shape": "^'c' argument has [0-9]+ elements", # shape mismatch
"conversion": "^'c' argument must either be valid", # bad vals
"conversion": "^'c' argument must be a mpl color", # bad vals
}
x = y = [0, 1, 2, 3]
fig, ax = plt.subplots()
Expand All @@ -1807,6 +1808,65 @@ def test_scatter_c(self, c_case, re_key):
ax.scatter(x, y, c=c_case, edgecolors="black")


def _params(c=None, xshape=(2,), yshape=(2,), **kwargs):
edgecolors = kwargs.pop('edgecolors', None)
return (c, edgecolors, kwargs if kwargs is not None else {},
xshape, yshape)
_result = namedtuple('_result', 'c, colors')


@pytest.mark.parametrize('params, expected_result',
[(_params(),
_result(c='b', colors=np.array([[0, 0, 1, 1]]))),
(_params(c='r'),
_result(c='r', colors=np.array([[1, 0, 0, 1]]))),
(_params(c='r', colors='b'),
_result(c='r', colors=np.array([[1, 0, 0, 1]]))),
# color
(_params(color='b'),
_result(c='b', colors=np.array([[0, 0, 1, 1]]))),
(_params(color=['b', 'g']),
_result(c=['b', 'g'], colors=np.array([[0, 0, 1, 1], [0, .5, 0, 1]]))),
])
def test_parse_scatter_color_args(params, expected_result):
from matplotlib.axes import Axes
dummyself = 'UNUSED' # self is only used in one case, which we do not
# test. Therefore we can get away without costly
# creating an Axes instance.
c, colors, _edgecolors = Axes._parse_scatter_color_args(dummyself, *params)
assert c == expected_result.c
assert_allclose(colors, expected_result.colors)

del _params
del _result


@pytest.mark.parametrize('kwargs, expected_edgecolors',
[(dict(), None),
(dict(c='b'), None),
(dict(edgecolors='r'), 'r'),
(dict(edgecolors=['r', 'g']), ['r', 'g']),
(dict(edgecolor='r'), 'r'),
(dict(edgecolors='face'), 'face'),
(dict(edgecolors='none'), 'none'),
(dict(edgecolor='r', edgecolors='g'), 'r'),
(dict(c='b', edgecolor='r', edgecolors='g'), 'r'),
(dict(color='r'), 'r'),
(dict(color='r', edgecolor='g'), 'g'),
])
def test_parse_scatter_color_args_edgecolors(kwargs, expected_edgecolors):
from matplotlib.axes import Axes
dummyself = 'UNUSED' # self is only used in one case, which we do not
# test. Therefore we can get away without costly
# creating an Axes instance.
c = kwargs.pop('c', None)
edgecolors = kwargs.pop('edgecolors', None)
_, _, result_edgecolors = \
Axes._parse_scatter_color_args(dummyself, c, edgecolors, kwargs,
xshape=(2,), yshape=(2,))
assert result_edgecolors == expected_edgecolors


def test_as_mpl_axes_api():
# tests the _as_mpl_axes api
from matplotlib.projections.polar import PolarAxes
Expand Down