From 0c8e9d852a28a7b9bcf55c80d92d13b20ed9468a Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Wed, 17 May 2017 21:57:29 -0400 Subject: [PATCH 1/4] FIX: if bins input to hist is str, treat like no bins This change causes the range of all data sets to be computed and passed to numpy (which in turn uses the total range to compute the 'best' bins). The existing code 'latches' the bins from the first data set to use for the rest so this is can still lead to poor binning (if the data sets are widely different). closes #8636 --- lib/matplotlib/axes/_axes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index 4630a7568818..ce88d9761db9 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -6612,7 +6612,9 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, bin_range = self.convert_xunits(bin_range) # Check whether bins or range are given explicitly. - binsgiven = np.iterable(bins) or bin_range is not None + binsgiven = ((np.iterable(bins) and + not isinstance(bins, str)) or + bin_range is not None) # We need to do to 'weights' what was done to 'x' if weights is not None: From 5892d62a5f745fffe9c71335baeecb76ff5abf9c Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sun, 24 Feb 2019 15:53:09 -0500 Subject: [PATCH 2/4] TST: add test of correctly getting the limits for multi-histogram --- lib/matplotlib/tests/test_axes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index 741a9c2b574a..0e405407e4f0 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -6346,3 +6346,9 @@ def test_datetime_masked(): ax.plot(x, m) # these are the default viewlim assert ax.get_xlim() == (730120.0, 733773.0) + + +def test_hist_auto_bins(): + _, bins, _ = plt.hist([[1, 2, 3], [3, 4, 5, 6]], bins='auto') + assert bins[0] <= 1 + assert bins[-1] >= 6 From 239be7b18e311c57a1393b6eeefc62b7cc629339 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sun, 24 Feb 2019 21:03:29 -0500 Subject: [PATCH 3/4] MNT: copy logic from numpy as suggested by @eric-wieser We are no longer tracking if the bins kwarg was passed, but if it was passed in is an array we should use as the bin edges. Simplify some internal logic. --- lib/matplotlib/axes/_axes.py | 49 +++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index ce88d9761db9..75993581097d 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -36,6 +36,25 @@ from matplotlib.axes._base import _AxesBase, _process_plot_format from matplotlib.axes._secondary_axes import SecondaryAxis +try: + from numpy.lib.histograms import histogram_bin_edges +except ImportError: + def histogram_bin_edges(arr, bins, range=None, weights=None): + if isinstance(bins, str): + # rather than backporting the internals, just do the full + # computation. If this is too slow for users, they can + # update numpy, or pick a manual number of bins + return np.histogram(arr, bins, range, weights)[1] + else: + if bins is None: + # hard-code numpy's default + bins = 10 + if range is None: + range = np.min(arr), np.max(arr) + + return np.linspace(*range, bins + 1) + + _log = logging.getLogger(__name__) @@ -6611,10 +6630,8 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, if bin_range is not None: bin_range = self.convert_xunits(bin_range) - # Check whether bins or range are given explicitly. - binsgiven = ((np.iterable(bins) and - not isinstance(bins, str)) or - bin_range is not None) + # this in True for 1D arrays, and False for None and str + bins_array_given = np.ndim(bins) == 1 # We need to do to 'weights' what was done to 'x' if weights is not None: @@ -6640,22 +6657,24 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, "sets and %d colors were provided" % (nx, len(color))) raise ValueError(error_message) + hist_kwargs = dict() + # If bins are not specified either explicitly or via range, # we need to figure out the range required for all datasets, # and supply that to np.histogram. - if not binsgiven and not input_empty: - xmin = np.inf - xmax = -np.inf - for xi in x: - if len(xi) > 0: - xmin = min(xmin, np.nanmin(xi)) - xmax = max(xmax, np.nanmax(xi)) - bin_range = (xmin, xmax) + if not bins_array_given and not input_empty and len(x) > 1: + if weights is not None: + _w = np.concatenate(w) + else: + _w = None + bins = histogram_bin_edges(np.concatenate(x), + bins, bin_range, _w) + else: + hist_kwargs['range'] = bin_range + density = bool(density) or bool(normed) if density and not stacked: - hist_kwargs = dict(range=bin_range, density=density) - else: - hist_kwargs = dict(range=bin_range) + hist_kwargs = dict(density=density) # List to store all the top coordinates of the histograms tops = [] From c6f05548c7e4b7bf8ce90b4eb71ad884e15563c8 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sun, 31 Mar 2019 00:42:51 -0400 Subject: [PATCH 4/4] FIX: restore (and test) handling of nan in hist data --- lib/matplotlib/axes/_axes.py | 28 ++++++++++++++++++++++++---- lib/matplotlib/tests/test_axes.py | 14 ++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index 75993581097d..b63c810d210e 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -39,7 +39,12 @@ try: from numpy.lib.histograms import histogram_bin_edges except ImportError: + # this function is new in np 1.15 def histogram_bin_edges(arr, bins, range=None, weights=None): + # this in True for 1D arrays, and False for None and str + if np.ndim(bins) == 1: + return bins + if isinstance(bins, str): # rather than backporting the internals, just do the full # computation. If this is too slow for users, they can @@ -6630,9 +6635,6 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, if bin_range is not None: bin_range = self.convert_xunits(bin_range) - # this in True for 1D arrays, and False for None and str - bins_array_given = np.ndim(bins) == 1 - # We need to do to 'weights' what was done to 'x' if weights is not None: w = cbook._reshape_2D(weights, 'weights') @@ -6659,14 +6661,32 @@ def hist(self, x, bins=None, range=None, density=None, weights=None, hist_kwargs = dict() + # if the bin_range is not given, compute without nan numpy + # does not do this for us when guessing the range (but will + # happily ignore nans when computing the histogram). + if bin_range is None: + xmin = np.inf + xmax = -np.inf + for xi in x: + if len(xi): + # python's min/max ignore nan, + # np.minnan returns nan for all nan input + xmin = min(xmin, np.nanmin(xi)) + xmax = max(xmax, np.nanmax(xi)) + # make sure we have seen at least one non-nan and finite + # value before we reset the bin range + if not np.isnan([xmin, xmax]).any() and not (xmin > xmax): + bin_range = (xmin, xmax) + # If bins are not specified either explicitly or via range, # we need to figure out the range required for all datasets, # and supply that to np.histogram. - if not bins_array_given and not input_empty and len(x) > 1: + if not input_empty and len(x) > 1: if weights is not None: _w = np.concatenate(w) else: _w = None + bins = histogram_bin_edges(np.concatenate(x), bins, bin_range, _w) else: diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index 0e405407e4f0..3e5d00b16f06 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -6352,3 +6352,17 @@ def test_hist_auto_bins(): _, bins, _ = plt.hist([[1, 2, 3], [3, 4, 5, 6]], bins='auto') assert bins[0] <= 1 assert bins[-1] >= 6 + + +def test_hist_nan_data(): + fig, (ax1, ax2) = plt.subplots(2) + + data = [1, 2, 3] + nan_data = data + [np.nan] + + bins, edges, _ = ax1.hist(data) + with np.errstate(invalid='ignore'): + nanbins, nanedges, _ = ax2.hist(nan_data) + + assert np.allclose(bins, nanbins) + assert np.allclose(edges, nanedges)