Skip to content

Commit cb15a82

Browse files
mtsokollesteve
andauthored
MAINT: Remove np.in1d and np.trapz usages (#27140)
Co-authored-by: Loïc Estève <loic.esteve@ymail.com>
1 parent 95778fb commit cb15a82

File tree

15 files changed

+30
-20
lines changed

15 files changed

+30
-20
lines changed

examples/semi_supervised/plot_label_propagation_digits_active_learning.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
# select up to 5 digit examples that the classifier is most uncertain about
8080
uncertainty_index = np.argsort(pred_entropies)[::-1]
8181
uncertainty_index = uncertainty_index[
82-
np.in1d(uncertainty_index, unlabeled_indices)
82+
np.isin(uncertainty_index, unlabeled_indices)
8383
][:5]
8484

8585
# keep track of indices that we get labels for

sklearn/datasets/_twenty_newsgroups.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def fetch_20newsgroups(
319319
# Sort the categories to have the ordering of the labels
320320
labels.sort()
321321
labels, categories = zip(*labels)
322-
mask = np.in1d(data.target, labels)
322+
mask = np.isin(data.target, labels)
323323
data.filenames = data.filenames[mask]
324324
data.target = data.target[mask]
325325
# searchsorted to have continuous labels

sklearn/feature_extraction/image.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def _mask_edges_weights(mask, edges, weights=None):
7676
"""Apply a mask to edges (weighted or not)"""
7777
inds = np.arange(mask.size)
7878
inds = inds[mask.ravel()]
79-
ind_mask = np.logical_and(np.in1d(edges[0], inds), np.in1d(edges[1], inds))
79+
ind_mask = np.logical_and(np.isin(edges[0], inds), np.isin(edges[1], inds))
8080
edges = edges[:, ind_mask]
8181
if weights is not None:
8282
weights = weights[ind_mask]

sklearn/metrics/_plot/tests/test_precision_recall_display.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from sklearn.pipeline import make_pipeline
1717
from sklearn.preprocessing import StandardScaler
1818
from sklearn.utils import shuffle
19+
from sklearn.utils.fixes import trapezoid
1920

2021
# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
2122
pytestmark = pytest.mark.filterwarnings(
@@ -289,7 +290,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
289290
# we should obtain the statistics of the "cancer" class
290291
avg_prec_limit = 0.65
291292
assert display.average_precision < avg_prec_limit
292-
assert -np.trapz(display.precision, display.recall) < avg_prec_limit
293+
assert -trapezoid(display.precision, display.recall) < avg_prec_limit
293294

294295
# otherwise we should obtain the statistics of the "not cancer" class
295296
if constructor_name == "from_estimator":
@@ -308,7 +309,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
308309
)
309310
avg_prec_limit = 0.95
310311
assert display.average_precision > avg_prec_limit
311-
assert -np.trapz(display.precision, display.recall) > avg_prec_limit
312+
assert -trapezoid(display.precision, display.recall) > avg_prec_limit
312313

313314

314315
@pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])

sklearn/metrics/_plot/tests/test_roc_curve_display.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from sklearn.pipeline import make_pipeline
1212
from sklearn.preprocessing import StandardScaler
1313
from sklearn.utils import shuffle
14+
from sklearn.utils.fixes import trapezoid
1415

1516

1617
@pytest.fixture(scope="module")
@@ -293,7 +294,7 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
293294
roc_auc_limit = 0.95679
294295

295296
assert display.roc_auc == pytest.approx(roc_auc_limit)
296-
assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
297+
assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
297298

298299
if constructor_name == "from_estimator":
299300
display = RocCurveDisplay.from_estimator(
@@ -311,4 +312,4 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
311312
)
312313

313314
assert display.roc_auc == pytest.approx(roc_auc_limit)
314-
assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
315+
assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)

sklearn/metrics/_ranking.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from ..utils._encode import _encode, _unique
3939
from ..utils._param_validation import Interval, StrOptions, validate_params
4040
from ..utils.extmath import stable_cumsum
41+
from ..utils.fixes import trapezoid
4142
from ..utils.multiclass import type_of_target
4243
from ..utils.sparsefuncs import count_nonzero
4344
from ..utils.validation import _check_pos_label_consistency, _check_sample_weight
@@ -104,9 +105,9 @@ def auc(x, y):
104105
else:
105106
raise ValueError("x is neither increasing nor decreasing : {}.".format(x))
106107

107-
area = direction * np.trapz(y, x)
108+
area = direction * trapezoid(y, x)
108109
if isinstance(area, np.memmap):
109-
# Reductions such as .sum used internally in np.trapz do not return a
110+
# Reductions such as .sum used internally in trapezoid do not return a
110111
# scalar by default for numpy.memmap instances contrary to
111112
# regular numpy.ndarray instances.
112113
area = area.dtype.type(area)

sklearn/model_selection/_split.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1973,8 +1973,8 @@ def _iter_indices(self, X, y, groups):
19731973
# these are the indices of classes in the partition
19741974
# invert them into data indices
19751975

1976-
train = np.flatnonzero(np.in1d(group_indices, group_train))
1977-
test = np.flatnonzero(np.in1d(group_indices, group_test))
1976+
train = np.flatnonzero(np.isin(group_indices, group_train))
1977+
test = np.flatnonzero(np.isin(group_indices, group_test))
19781978

19791979
yield train, test
19801980

sklearn/model_selection/tests/test_search.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1418,7 +1418,7 @@ def test_grid_search_correct_score_results():
14181418
expected_keys = ("mean_test_score", "rank_test_score") + tuple(
14191419
"split%d_test_score" % cv_i for cv_i in range(n_splits)
14201420
)
1421-
assert all(np.in1d(expected_keys, result_keys))
1421+
assert all(np.isin(expected_keys, result_keys))
14221422

14231423
cv = StratifiedKFold(n_splits=n_splits)
14241424
n_splits = grid_search.n_splits_

sklearn/model_selection/tests/test_split.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -987,8 +987,8 @@ def test_group_shuffle_split():
987987
# First test: no train group is in the test set and vice versa
988988
l_train_unique = np.unique(l[train])
989989
l_test_unique = np.unique(l[test])
990-
assert not np.any(np.in1d(l[train], l_test_unique))
991-
assert not np.any(np.in1d(l[test], l_train_unique))
990+
assert not np.any(np.isin(l[train], l_test_unique))
991+
assert not np.any(np.isin(l[test], l_train_unique))
992992

993993
# Second test: train and test add up to all the data
994994
assert l[train].size + l[test].size == l.size

sklearn/naive_bayes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):
467467
classes = self.classes_
468468

469469
unique_y = np.unique(y)
470-
unique_y_in_classes = np.in1d(unique_y, classes)
470+
unique_y_in_classes = np.isin(unique_y, classes)
471471

472472
if not np.all(unique_y_in_classes):
473473
raise ValueError(

sklearn/preprocessing/_label.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
553553
y = column_or_1d(y)
554554

555555
# pick out the known labels from y
556-
y_in_classes = np.in1d(y, classes)
556+
y_in_classes = np.isin(y, classes)
557557
y_seen = y[y_in_classes]
558558
indices = np.searchsorted(sorted_class, y_seen)
559559
indptr = np.hstack((0, np.cumsum(y_in_classes)))

sklearn/tests/test_isotonic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def test_isotonic_thresholds(increasing):
595595
# the data is already strictly monotonic which is not the case with
596596
# this random data)
597597
assert X_thresholds.shape[0] < X.shape[0]
598-
assert np.in1d(X_thresholds, X).all()
598+
assert np.isin(X_thresholds, X).all()
599599

600600
# Output thresholds lie in the range of the training set:
601601
assert y_thresholds.max() <= y.max()

sklearn/utils/_encode.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def is_valid(value):
296296
diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
297297
if return_mask:
298298
if diff.size:
299-
valid_mask = np.in1d(values, known_values)
299+
valid_mask = np.isin(values, known_values)
300300
else:
301301
valid_mask = np.ones(len(values), dtype=bool)
302302

sklearn/utils/class_weight.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def compute_class_weight(class_weight, *, classes, y):
5757
# Find the weight of each class as present in y.
5858
le = LabelEncoder()
5959
y_ind = le.fit_transform(y)
60-
if not all(np.in1d(classes, le.classes_)):
60+
if not all(np.isin(classes, le.classes_)):
6161
raise ValueError("classes should have valid labels that are in y")
6262

6363
recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64))
@@ -195,7 +195,7 @@ def compute_sample_weight(class_weight, y, *, indices=None):
195195

196196
if classes_missing:
197197
# Make missing classes' weight zero
198-
weight_k[np.in1d(y_full, list(classes_missing))] = 0.0
198+
weight_k[np.isin(y_full, list(classes_missing))] = 0.0
199199

200200
expanded_class_weight.append(weight_k)
201201

sklearn/utils/fixes.py

+7
Original file line numberDiff line numberDiff line change
@@ -200,3 +200,10 @@ def _contents(data_module):
200200
from numpy.exceptions import ComplexWarning, VisibleDeprecationWarning
201201
else:
202202
from numpy import ComplexWarning, VisibleDeprecationWarning # type: ignore # noqa
203+
204+
205+
# TODO: Remove when Scipy 1.6 is the minimum supported version
206+
try:
207+
from scipy.integrate import trapezoid # type: ignore # noqa
208+
except ImportError:
209+
from scipy.integrate import trapz as trapezoid # type: ignore # noqa

0 commit comments

Comments
 (0)