Skip to content

Commit 159507a

Browse files
WarrenWeckessercharris
authored andcommitted
BUG: lib: Fix histogram problem with signed integer arrays.
An input such as np.histogram(np.array([-2, 0, 127], dtype=np.int8), bins="auto") would raise the exception ValueError: Number of samples, -1, must be non-negative. The problem was that the peak-to-peak value for the input array was computed with the `ptp` method, which returned negative values for signed integer arrays when the actual value was more than the maximum signed value of the array's data type. The fix is to use a peak-to-peak function that returns an unsigned value for signed integer arrays. Closes numpygh-14379.
1 parent ef0656f commit 159507a

File tree

2 files changed

+26
-5
lines changed

2 files changed

+26
-5
lines changed

numpy/lib/histograms.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,16 @@
2222
_range = range
2323

2424

25+
def _ptp(x):
26+
"""Peak-to-peak value of x.
27+
28+
This implementation avoids the problem of signed integer arrays having a
29+
peak-to-peak value that cannot be represented with the array's data type.
30+
This function returns an unsigned value for signed integer arrays.
31+
"""
32+
return _unsigned_subtract(x.max(), x.min())
33+
34+
2535
def _hist_bin_sqrt(x, range):
2636
"""
2737
Square root histogram bin estimator.
@@ -40,7 +50,7 @@ def _hist_bin_sqrt(x, range):
4050
h : An estimate of the optimal bin width for the given data.
4151
"""
4252
del range # unused
43-
return x.ptp() / np.sqrt(x.size)
53+
return _ptp(x) / np.sqrt(x.size)
4454

4555

4656
def _hist_bin_sturges(x, range):
@@ -63,7 +73,7 @@ def _hist_bin_sturges(x, range):
6373
h : An estimate of the optimal bin width for the given data.
6474
"""
6575
del range # unused
66-
return x.ptp() / (np.log2(x.size) + 1.0)
76+
return _ptp(x) / (np.log2(x.size) + 1.0)
6777

6878

6979
def _hist_bin_rice(x, range):
@@ -87,7 +97,7 @@ def _hist_bin_rice(x, range):
8797
h : An estimate of the optimal bin width for the given data.
8898
"""
8999
del range # unused
90-
return x.ptp() / (2.0 * x.size ** (1.0 / 3))
100+
return _ptp(x) / (2.0 * x.size ** (1.0 / 3))
91101

92102

93103
def _hist_bin_scott(x, range):
@@ -137,7 +147,7 @@ def _hist_bin_stone(x, range):
137147
"""
138148

139149
n = x.size
140-
ptp_x = np.ptp(x)
150+
ptp_x = _ptp(x)
141151
if n <= 1 or ptp_x == 0:
142152
return 0
143153

@@ -184,7 +194,7 @@ def _hist_bin_doane(x, range):
184194
np.true_divide(temp, sigma, temp)
185195
np.power(temp, 3, temp)
186196
g1 = np.mean(temp)
187-
return x.ptp() / (1.0 + np.log2(x.size) +
197+
return _ptp(x) / (1.0 + np.log2(x.size) +
188198
np.log2(1.0 + np.absolute(g1) / sg1))
189199
return 0.0
190200

numpy/lib/tests/test_histograms.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
assert_array_almost_equal, assert_raises, assert_allclose,
99
assert_array_max_ulp, assert_raises_regex, suppress_warnings,
1010
)
11+
import pytest
1112

1213

1314
class TestHistogram(object):
@@ -591,6 +592,16 @@ def test_simple_range(self):
591592
msg += " with datasize of {0}".format(testlen)
592593
assert_equal(len(a), numbins, err_msg=msg)
593594

595+
@pytest.mark.parametrize("bins", ['auto', 'fd', 'doane', 'scott',
596+
'stone', 'rice', 'sturges'])
597+
def test_signed_integer_data(self, bins):
598+
# Regression test for gh-14379.
599+
a = np.array([-2, 0, 127], dtype=np.int8)
600+
hist, edges = np.histogram(a, bins=bins)
601+
hist32, edges32 = np.histogram(a.astype(np.int32), bins=bins)
602+
assert_array_equal(hist, hist32)
603+
assert_array_equal(edges, edges32)
604+
594605
def test_simple_weighted(self):
595606
"""
596607
Check that weighted data raises a TypeError

0 commit comments

Comments
 (0)