Skip to content

Commit ade66be

Browse files
committed
MAINT: Remove np.in1d and np.trapz usages
1 parent bb58543 commit ade66be

File tree

14 files changed

+25
-20
lines changed

14 files changed

+25
-20
lines changed

examples/semi_supervised/plot_label_propagation_digits_active_learning.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
# select up to 5 digit examples that the classifier is most uncertain about
8080
uncertainty_index = np.argsort(pred_entropies)[::-1]
8181
uncertainty_index = uncertainty_index[
82-
np.in1d(uncertainty_index, unlabeled_indices)
82+
np.isin(uncertainty_index, unlabeled_indices).ravel()
8383
][:5]
8484

8585
# keep track of indices that we get labels for

sklearn/datasets/_twenty_newsgroups.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def fetch_20newsgroups(
319319
# Sort the categories to have the ordering of the labels
320320
labels.sort()
321321
labels, categories = zip(*labels)
322-
mask = np.in1d(data.target, labels)
322+
mask = np.isin(data.target, labels)
323323
data.filenames = data.filenames[mask]
324324
data.target = data.target[mask]
325325
# searchsorted to have continuous labels

sklearn/feature_extraction/image.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ def _mask_edges_weights(mask, edges, weights=None):
7676
"""Apply a mask to edges (weighted or not)"""
7777
inds = np.arange(mask.size)
7878
inds = inds[mask.ravel()]
79-
ind_mask = np.logical_and(np.in1d(edges[0], inds), np.in1d(edges[1], inds))
79+
ind_mask = np.logical_and(
80+
np.isin(edges[0], inds).ravel(), np.isin(edges[1], inds).ravel()
81+
)
8082
edges = edges[:, ind_mask]
8183
if weights is not None:
8284
weights = weights[ind_mask]

sklearn/metrics/_plot/tests/test_precision_recall_display.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
import pytest
5+
from scipy.integrate import trapezoid
56

67
from sklearn.compose import make_column_transformer
78
from sklearn.datasets import load_breast_cancer, make_classification
@@ -286,7 +287,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
286287
# we should obtain the statistics of the "cancer" class
287288
avg_prec_limit = 0.65
288289
assert display.average_precision < avg_prec_limit
289-
assert -np.trapz(display.precision, display.recall) < avg_prec_limit
290+
assert -trapezoid(display.precision, display.recall) < avg_prec_limit
290291

291292
# otherwise we should obtain the statistics of the "not cancer" class
292293
if constructor_name == "from_estimator":
@@ -305,7 +306,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
305306
)
306307
avg_prec_limit = 0.95
307308
assert display.average_precision > avg_prec_limit
308-
assert -np.trapz(display.precision, display.recall) > avg_prec_limit
309+
assert -trapezoid(display.precision, display.recall) > avg_prec_limit
309310

310311

311312
@pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])

sklearn/metrics/_plot/tests/test_roc_curve_display.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33
from numpy.testing import assert_allclose
4+
from scipy.integrate import trapezoid
45

56
from sklearn.compose import make_column_transformer
67
from sklearn.datasets import load_breast_cancer, load_iris
@@ -290,7 +291,7 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
290291
roc_auc_limit = 0.95679
291292

292293
assert display.roc_auc == pytest.approx(roc_auc_limit)
293-
assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
294+
assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
294295

295296
if constructor_name == "from_estimator":
296297
display = RocCurveDisplay.from_estimator(
@@ -308,4 +309,4 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
308309
)
309310

310311
assert display.roc_auc == pytest.approx(roc_auc_limit)
311-
assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
312+
assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)

sklearn/metrics/_ranking.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from numbers import Integral, Real
2525

2626
import numpy as np
27+
from scipy.integrate import trapezoid
2728
from scipy.sparse import csr_matrix, issparse
2829
from scipy.stats import rankdata
2930

@@ -104,9 +105,9 @@ def auc(x, y):
104105
else:
105106
raise ValueError("x is neither increasing nor decreasing : {}.".format(x))
106107

107-
area = direction * np.trapz(y, x)
108+
area = direction * trapezoid(y, x)
108109
if isinstance(area, np.memmap):
109-
# Reductions such as .sum used internally in np.trapz do not return a
110+
# Reductions such as .sum used internally in trapezoid do not return a
110111
# scalar by default for numpy.memmap instances contrary to
111112
# regular numpy.ndarray instances.
112113
area = area.dtype.type(area)

sklearn/model_selection/_split.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1973,8 +1973,8 @@ def _iter_indices(self, X, y, groups):
19731973
# these are the indices of classes in the partition
19741974
# invert them into data indices
19751975

1976-
train = np.flatnonzero(np.in1d(group_indices, group_train))
1977-
test = np.flatnonzero(np.in1d(group_indices, group_test))
1976+
train = np.flatnonzero(np.isin(group_indices, group_train))
1977+
test = np.flatnonzero(np.isin(group_indices, group_test))
19781978

19791979
yield train, test
19801980

sklearn/model_selection/tests/test_search.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1418,7 +1418,7 @@ def test_grid_search_correct_score_results():
14181418
expected_keys = ("mean_test_score", "rank_test_score") + tuple(
14191419
"split%d_test_score" % cv_i for cv_i in range(n_splits)
14201420
)
1421-
assert all(np.in1d(expected_keys, result_keys))
1421+
assert all(np.isin(expected_keys, result_keys))
14221422

14231423
cv = StratifiedKFold(n_splits=n_splits)
14241424
n_splits = grid_search.n_splits_

sklearn/model_selection/tests/test_split.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -987,8 +987,8 @@ def test_group_shuffle_split():
987987
# First test: no train group is in the test set and vice versa
988988
l_train_unique = np.unique(l[train])
989989
l_test_unique = np.unique(l[test])
990-
assert not np.any(np.in1d(l[train], l_test_unique))
991-
assert not np.any(np.in1d(l[test], l_train_unique))
990+
assert not np.any(np.isin(l[train], l_test_unique))
991+
assert not np.any(np.isin(l[test], l_train_unique))
992992

993993
# Second test: train and test add up to all the data
994994
assert l[train].size + l[test].size == l.size

sklearn/naive_bayes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):
467467
classes = self.classes_
468468

469469
unique_y = np.unique(y)
470-
unique_y_in_classes = np.in1d(unique_y, classes)
470+
unique_y_in_classes = np.isin(unique_y, classes)
471471

472472
if not np.all(unique_y_in_classes):
473473
raise ValueError(

sklearn/preprocessing/_label.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
553553
y = column_or_1d(y)
554554

555555
# pick out the known labels from y
556-
y_in_classes = np.in1d(y, classes)
556+
y_in_classes = np.isin(y, classes)
557557
y_seen = y[y_in_classes]
558558
indices = np.searchsorted(sorted_class, y_seen)
559559
indptr = np.hstack((0, np.cumsum(y_in_classes)))

sklearn/tests/test_isotonic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def test_isotonic_thresholds(increasing):
595595
# the data is already strictly monotonic which is not the case with
596596
# this random data)
597597
assert X_thresholds.shape[0] < X.shape[0]
598-
assert np.in1d(X_thresholds, X).all()
598+
assert np.isin(X_thresholds, X).all()
599599

600600
# Output thresholds lie in the range of the training set:
601601
assert y_thresholds.max() <= y.max()

sklearn/utils/_encode.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def is_valid(value):
296296
diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
297297
if return_mask:
298298
if diff.size:
299-
valid_mask = np.in1d(values, known_values)
299+
valid_mask = np.isin(values, known_values).ravel()
300300
else:
301301
valid_mask = np.ones(len(values), dtype=bool)
302302

sklearn/utils/class_weight.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def compute_class_weight(class_weight, *, classes, y):
5757
# Find the weight of each class as present in y.
5858
le = LabelEncoder()
5959
y_ind = le.fit_transform(y)
60-
if not all(np.in1d(classes, le.classes_)):
60+
if not all(np.isin(classes, le.classes_)):
6161
raise ValueError("classes should have valid labels that are in y")
6262

6363
recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64))
@@ -194,7 +194,7 @@ def compute_sample_weight(class_weight, y, *, indices=None):
194194

195195
if classes_missing:
196196
# Make missing classes' weight zero
197-
weight_k[np.in1d(y_full, list(classes_missing))] = 0.0
197+
weight_k[np.isin(y_full, list(classes_missing))] = 0.0
198198

199199
expanded_class_weight.append(weight_k)
200200

0 commit comments

Comments
 (0)