Skip to content

Commit 59fde25

Browse files
committed
DEP: Deprecate boolean array indices with non-matching shape
Boolean array indices will (unless the special case is taken) always be converted using a nonzero logic. However, for example ``` arr = np.arange(10) index = np.array([True]) arr[index] ``` would thus work as if index is filled up with `False`. This is a source of confusion and hardly useful in practice. Especially if the array has more then one dimension this behaviour can be very unexpected as it conflicts with broadcasting.
1 parent 7e53e9b commit 59fde25

File tree

7 files changed

+81
-20
lines changed

7 files changed

+81
-20
lines changed

numpy/core/src/multiarray/mapping.c

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
563563

564564
index_type |= HAS_FANCY;
565565
indices[curr_idx].type = HAS_0D_BOOL;
566+
indices[curr_idx].value = 1;
566567

567568
/* TODO: This can't fail, right? Is there a faster way? */
568569
if (PyObject_IsTrue((PyObject *)arr)) {
@@ -609,6 +610,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
609610
index_type |= HAS_FANCY;
610611
for (i=0; i < n; i++) {
611612
indices[curr_idx].type = HAS_FANCY;
613+
indices[curr_idx].value = PyArray_DIM(arr, i);
612614
indices[curr_idx].object = (PyObject *)nonzero_result[i];
613615

614616
used_ndim += 1;
@@ -651,6 +653,7 @@ prepare_index(PyArrayObject *self, PyObject *index,
651653

652654
index_type |= HAS_FANCY;
653655
indices[curr_idx].type = HAS_FANCY;
656+
indices[curr_idx].value = -1;
654657
indices[curr_idx].object = (PyObject *)arr;
655658

656659
used_ndim += 1;
@@ -738,7 +741,8 @@ prepare_index(PyArrayObject *self, PyObject *index,
738741
/*
739742
* At this point indices are all set correctly, no bounds checking
740743
* has been made and the new array may still have more dimensions
741-
* then is possible.
744+
* then is possible. And boolean indexing arrays may have had a
745+
* non-matching shape.
742746
*
743747
* Check this now so we do not have to worry about it later.
744748
* It can happen for fancy indexing or with newaxis.
@@ -753,6 +757,42 @@ prepare_index(PyArrayObject *self, PyObject *index,
753757
NPY_MAXDIMS, (new_ndim + fancy_ndim));
754758
goto failed_building_indices;
755759
}
760+
761+
/*
762+
* If we had a fancy index, we may have had a boolean array index.
763+
* So check if this had the correct shape, now that we can find out
764+
* which axes it acts on.
765+
*/
766+
used_ndim = 0;
767+
for (i=0; i < curr_idx; i++) {
768+
if ((indices[i].type == HAS_FANCY) && indices[i].value > 0) {
769+
if (indices[i].value != PyArray_DIM(self, used_ndim)) {
770+
char *err_msg[174];
771+
sprintf(err_msg,
772+
"boolean index did not match indexed array along "
773+
"dimension %d; dimension is %" NPY_INTP_FMT
774+
" but corresponding boolean dimension is %" NPY_INTP_FMT,
775+
used_ndim, PyArray_DIM(self, used_ndim),
776+
indices[i].value);
777+
778+
if (DEPRECATE(err_msg) < 0) {
779+
goto failed_building_indices;
780+
}
781+
break;
782+
}
783+
}
784+
785+
if (indices[i].type == HAS_ELLIPSIS) {
786+
used_ndim += indices[i].value;
787+
}
788+
else if ((indices[i].type == HAS_NEWAXIS) ||
789+
(indices[i].type == HAS_0D_BOOL)) {
790+
used_ndim += 0;
791+
}
792+
else {
793+
used_ndim += 1;
794+
}
795+
}
756796
}
757797

758798
*num = curr_idx;

numpy/core/src/multiarray/mapping.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@ typedef struct {
1919
* Object of index: slice, array, or NULL. Owns a reference.
2020
*/
2121
PyObject *object;
22-
/* Value of an integer index or number of slices an Ellipsis is worth */
22+
/*
23+
* Value of an integer index, number of slices an Ellipsis is worth,
24+
* -1 if input was an integer array and the original size of the
25+
* boolean array if it is a converted boolean array.
26+
*/
2327
npy_intp value;
2428
/* kind of index, see constants in mapping.c */
2529
int type;

numpy/core/tests/test_deprecations.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,5 +508,24 @@ def test_bool_error(self):
508508
self.assert_deprecated(np.not_equal, args=(a, a))
509509

510510

511+
class TestBooleanIndexShapeMismatchDeprecation(_DeprecationTestCase):
512+
"""Tests deprecation for boolean indexing where the boolean array
513+
does not match the input array along the given diemsions.
514+
"""
515+
message = r"boolean index did not match indexed array"
516+
517+
def test_simple(self):
518+
arr = np.ones((5, 4, 3))
519+
index = np.array([True])
520+
self.assert_deprecated(arr.__getitem__, args=(index,))
521+
522+
index = np.array([False] * 6)
523+
self.assert_deprecated(arr.__getitem__, args=(index,))
524+
525+
index = np.zeros((4, 4), dtype=bool)
526+
self.assert_deprecated(arr.__getitem__, args=(index,))
527+
self.assert_deprecated(arr.__getitem__, args=((slice(None), index),))
528+
529+
511530
if __name__ == "__main__":
512531
run_module_suite()

numpy/core/tests/test_indexing.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -678,14 +678,9 @@ def _get_multi_index(self, arr, indices):
678678
arr = arr.reshape((arr.shape[:ax] + (1,) + arr.shape[ax:]))
679679
continue
680680
if isinstance(indx, np.ndarray) and indx.dtype == bool:
681-
# This may be open for improvement in numpy.
682-
# numpy should probably cast boolean lists to boolean indices
683-
# instead of intp!
684-
685-
# Numpy supports for a boolean index with
686-
# non-matching shape as long as the True values are not
687-
# out of bounds. Numpy maybe should maybe not allow this,
688-
# (at least not array that are larger then the original one).
681+
if indx.shape != arr.shape[ax:ax+indx.ndim]:
682+
raise IndexError
683+
689684
try:
690685
flat_indx = np.ravel_multi_index(np.nonzero(indx),
691686
arr.shape[ax:ax+indx.ndim], mode='raise')

numpy/core/tests/test_multiarray.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2139,29 +2139,29 @@ def test_tuple(self):
21392139

21402140
def test_mask(self):
21412141
x = array([1, 2, 3, 4])
2142-
m = array([0, 1], bool)
2142+
m = array([0, 1, 0, 0], bool)
21432143
assert_array_equal(x[m], array([2]))
21442144

21452145
def test_mask2(self):
21462146
x = array([[1, 2, 3, 4], [5, 6, 7, 8]])
21472147
m = array([0, 1], bool)
2148-
m2 = array([[0, 1], [1, 0]], bool)
2149-
m3 = array([[0, 1]], bool)
2148+
m2 = array([[0, 1, 0, 0], [1, 0, 0, 0]], bool)
2149+
m3 = array([[0, 1, 0, 0], [0, 0, 0, 0]], bool)
21502150
assert_array_equal(x[m], array([[5, 6, 7, 8]]))
21512151
assert_array_equal(x[m2], array([2, 5]))
21522152
assert_array_equal(x[m3], array([2]))
21532153

21542154
def test_assign_mask(self):
21552155
x = array([1, 2, 3, 4])
2156-
m = array([0, 1], bool)
2156+
m = array([0, 1, 0, 0], bool)
21572157
x[m] = 5
21582158
assert_array_equal(x, array([1, 5, 3, 4]))
21592159

21602160
def test_assign_mask2(self):
21612161
xorig = array([[1, 2, 3, 4], [5, 6, 7, 8]])
21622162
m = array([0, 1], bool)
2163-
m2 = array([[0, 1], [1, 0]], bool)
2164-
m3 = array([[0, 1]], bool)
2163+
m2 = array([[0, 1, 0, 0], [1, 0, 0, 0]], bool)
2164+
m3 = array([[0, 1, 0, 0], [0, 0, 0, 0]], bool)
21652165
x = xorig.copy()
21662166
x[m] = 10
21672167
assert_array_equal(x, array([[1, 2, 3, 4], [10, 10, 10, 10]]))

numpy/core/tests/test_regression.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -787,18 +787,21 @@ def test_arange_non_native_dtype(self, level=rlevel):
787787
assert_equal(np.arange(0.5, dtype=dt).dtype, dt)
788788
assert_equal(np.arange(5, dtype=dt).dtype, dt)
789789

790-
def test_bool_indexing_invalid_nr_elements(self, level=rlevel):
790+
def test_bool_flat_indexing_invalid_nr_elements(self, level=rlevel):
791791
s = np.ones(10, dtype=float)
792792
x = np.array((15,), dtype=float)
793793
def ia(x, s, v): x[(s>0)]=v
794794
# After removing deprecation, the following are ValueErrors.
795795
# This might seem odd as compared to the value error below. This
796796
# is due to the fact that the new code always uses "nonzero" logic
797797
# and the boolean special case is not taken.
798-
self.assertRaises(IndexError, ia, x, s, np.zeros(9, dtype=float))
799-
self.assertRaises(IndexError, ia, x, s, np.zeros(11, dtype=float))
798+
with warnings.catch_warnings():
799+
warnings.simplefilter('ignore', DeprecationWarning)
800+
self.assertRaises(IndexError, ia, x, s, np.zeros(9, dtype=float))
801+
self.assertRaises(IndexError, ia, x, s, np.zeros(11, dtype=float))
800802
# Old special case (different code path):
801803
self.assertRaises(ValueError, ia, x.flat, s, np.zeros(9, dtype=float))
804+
self.assertRaises(ValueError, ia, x.flat, s, np.zeros(11, dtype=float))
802805

803806
def test_mem_scalar_indexing(self, level=rlevel):
804807
"""Ticket #603"""

0 commit comments

Comments
 (0)