Skip to content

FIX: if bins input to hist is str, treat like no bins #8638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 53 additions & 12 deletions lib/matplotlib/axes/_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,30 @@
from matplotlib.axes._base import _AxesBase, _process_plot_format
from matplotlib.axes._secondary_axes import SecondaryAxis

try:
from numpy.lib.histograms import histogram_bin_edges
except ImportError:
# this function is new in np 1.15
def histogram_bin_edges(arr, bins, range=None, weights=None):
# this in True for 1D arrays, and False for None and str
if np.ndim(bins) == 1:
return bins

if isinstance(bins, str):
# rather than backporting the internals, just do the full
# computation. If this is too slow for users, they can
# update numpy, or pick a manual number of bins
return np.histogram(arr, bins, range, weights)[1]
else:
if bins is None:
# hard-code numpy's default
bins = 10
if range is None:
range = np.min(arr), np.max(arr)

return np.linspace(*range, bins + 1)


_log = logging.getLogger(__name__)


Expand Down Expand Up @@ -6611,9 +6635,6 @@ def hist(self, x, bins=None, range=None, density=None, weights=None,
if bin_range is not None:
bin_range = self.convert_xunits(bin_range)

# Check whether bins or range are given explicitly.
binsgiven = np.iterable(bins) or bin_range is not None

# We need to do to 'weights' what was done to 'x'
if weights is not None:
w = cbook._reshape_2D(weights, 'weights')
Expand All @@ -6638,22 +6659,42 @@ def hist(self, x, bins=None, range=None, density=None, weights=None,
"sets and %d colors were provided" % (nx, len(color)))
raise ValueError(error_message)

# If bins are not specified either explicitly or via range,
# we need to figure out the range required for all datasets,
# and supply that to np.histogram.
if not binsgiven and not input_empty:
hist_kwargs = dict()

# if the bin_range is not given, compute without nan numpy
# does not do this for us when guessing the range (but will
# happily ignore nans when computing the histogram).
if bin_range is None:
xmin = np.inf
xmax = -np.inf
for xi in x:
if len(xi) > 0:
if len(xi):
# python's min/max ignore nan,
# np.minnan returns nan for all nan input
xmin = min(xmin, np.nanmin(xi))
xmax = max(xmax, np.nanmax(xi))
bin_range = (xmin, xmax)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You either need to keep this code for the case when bins is an integer, or you need to add a case for integral bins in the histogram_bin_edges backport

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, good catch on that, I fell down a different rabbit hole (see my comment at the issue level)...

# make sure we have seen at least one non-nan and finite
# value before we reset the bin range
if not np.isnan([xmin, xmax]).any() and not (xmin > xmax):
bin_range = (xmin, xmax)

# If bins are not specified either explicitly or via range,
# we need to figure out the range required for all datasets,
# and supply that to np.histogram.
if not input_empty and len(x) > 1:
if weights is not None:
_w = np.concatenate(w)
else:
_w = None

bins = histogram_bin_edges(np.concatenate(x),
bins, bin_range, _w)
else:
hist_kwargs['range'] = bin_range

density = bool(density) or bool(normed)
if density and not stacked:
hist_kwargs = dict(range=bin_range, density=density)
else:
hist_kwargs = dict(range=bin_range)
hist_kwargs = dict(density=density)

# List to store all the top coordinates of the histograms
tops = []
Expand Down
20 changes: 20 additions & 0 deletions lib/matplotlib/tests/test_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6346,3 +6346,23 @@ def test_datetime_masked():
ax.plot(x, m)
# these are the default viewlim
assert ax.get_xlim() == (730120.0, 733773.0)


def test_hist_auto_bins():
_, bins, _ = plt.hist([[1, 2, 3], [3, 4, 5, 6]], bins='auto')
assert bins[0] <= 1
assert bins[-1] >= 6


def test_hist_nan_data():
fig, (ax1, ax2) = plt.subplots(2)

data = [1, 2, 3]
nan_data = data + [np.nan]

bins, edges, _ = ax1.hist(data)
with np.errstate(invalid='ignore'):
nanbins, nanedges, _ = ax2.hist(nan_data)

assert np.allclose(bins, nanbins)
assert np.allclose(edges, nanedges)