diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py index 215655a287c2d..45af1d7891b2e 100644 --- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py +++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py @@ -79,7 +79,7 @@ # select up to 5 digit examples that the classifier is most uncertain about uncertainty_index = np.argsort(pred_entropies)[::-1] uncertainty_index = uncertainty_index[ - np.in1d(uncertainty_index, unlabeled_indices) + np.isin(uncertainty_index, unlabeled_indices) ][:5] # keep track of indices that we get labels for diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py index 95a7274c20f75..637cf8e4fc8d4 100644 --- a/sklearn/datasets/_twenty_newsgroups.py +++ b/sklearn/datasets/_twenty_newsgroups.py @@ -319,7 +319,7 @@ def fetch_20newsgroups( # Sort the categories to have the ordering of the labels labels.sort() labels, categories = zip(*labels) - mask = np.in1d(data.target, labels) + mask = np.isin(data.target, labels) data.filenames = data.filenames[mask] data.target = data.target[mask] # searchsorted to have continuous labels diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py index 1ef3895fe2818..a2a23b9ec4f3d 100644 --- a/sklearn/feature_extraction/image.py +++ b/sklearn/feature_extraction/image.py @@ -76,7 +76,7 @@ def _mask_edges_weights(mask, edges, weights=None): """Apply a mask to edges (weighted or not)""" inds = np.arange(mask.size) inds = inds[mask.ravel()] - ind_mask = np.logical_and(np.in1d(edges[0], inds), np.in1d(edges[1], inds)) + ind_mask = np.logical_and(np.isin(edges[0], inds), np.isin(edges[1], inds)) edges = edges[:, ind_mask] if weights is not None: weights = weights[ind_mask] diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py index 50571a5bd255a..0173e5338d722 100644 --- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py +++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py @@ -16,6 +16,7 @@ from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle +from sklearn.utils.fixes import trapezoid # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved pytestmark = pytest.mark.filterwarnings( @@ -289,7 +290,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth # we should obtain the statistics of the "cancer" class avg_prec_limit = 0.65 assert display.average_precision < avg_prec_limit - assert -np.trapz(display.precision, display.recall) < avg_prec_limit + assert -trapezoid(display.precision, display.recall) < avg_prec_limit # otherwise we should obtain the statistics of the "not cancer" class if constructor_name == "from_estimator": @@ -308,7 +309,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth ) avg_prec_limit = 0.95 assert display.average_precision > avg_prec_limit - assert -np.trapz(display.precision, display.recall) > avg_prec_limit + assert -trapezoid(display.precision, display.recall) > avg_prec_limit @pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"]) diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index dff5b4865a45e..8fd9f96576518 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -11,6 +11,7 @@ from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.utils import shuffle +from sklearn.utils.fixes import trapezoid @pytest.fixture(scope="module") @@ -293,7 +294,7 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name): roc_auc_limit = 0.95679 assert display.roc_auc == pytest.approx(roc_auc_limit) - assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit) + assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit) if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator( @@ -311,4 +312,4 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name): ) assert display.roc_auc == pytest.approx(roc_auc_limit) - assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit) + assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 166c2ce20eb87..a7d4b5ef18d66 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -38,6 +38,7 @@ from ..utils._encode import _encode, _unique from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils.extmath import stable_cumsum +from ..utils.fixes import trapezoid from ..utils.multiclass import type_of_target from ..utils.sparsefuncs import count_nonzero from ..utils.validation import _check_pos_label_consistency, _check_sample_weight @@ -104,9 +105,9 @@ def auc(x, y): else: raise ValueError("x is neither increasing nor decreasing : {}.".format(x)) - area = direction * np.trapz(y, x) + area = direction * trapezoid(y, x) if isinstance(area, np.memmap): - # Reductions such as .sum used internally in np.trapz do not return a + # Reductions such as .sum used internally in trapezoid do not return a # scalar by default for numpy.memmap instances contrary to # regular numpy.ndarray instances. area = area.dtype.type(area) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 2dceff9b22126..4d30538023abd 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -1973,8 +1973,8 @@ def _iter_indices(self, X, y, groups): # these are the indices of classes in the partition # invert them into data indices - train = np.flatnonzero(np.in1d(group_indices, group_train)) - test = np.flatnonzero(np.in1d(group_indices, group_test)) + train = np.flatnonzero(np.isin(group_indices, group_train)) + test = np.flatnonzero(np.isin(group_indices, group_test)) yield train, test diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 04c3f1f156fab..50b519118a2b3 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -1418,7 +1418,7 @@ def test_grid_search_correct_score_results(): expected_keys = ("mean_test_score", "rank_test_score") + tuple( "split%d_test_score" % cv_i for cv_i in range(n_splits) ) - assert all(np.in1d(expected_keys, result_keys)) + assert all(np.isin(expected_keys, result_keys)) cv = StratifiedKFold(n_splits=n_splits) n_splits = grid_search.n_splits_ diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 151498205dd39..648f11041cfbf 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -987,8 +987,8 @@ def test_group_shuffle_split(): # First test: no train group is in the test set and vice versa l_train_unique = np.unique(l[train]) l_test_unique = np.unique(l[test]) - assert not np.any(np.in1d(l[train], l_test_unique)) - assert not np.any(np.in1d(l[test], l_train_unique)) + assert not np.any(np.isin(l[train], l_test_unique)) + assert not np.any(np.isin(l[test], l_train_unique)) # Second test: train and test add up to all the data assert l[train].size + l[test].size == l.size diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 22e65f5062586..9ee664bf8b3a4 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -467,7 +467,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): classes = self.classes_ unique_y = np.unique(y) - unique_y_in_classes = np.in1d(unique_y, classes) + unique_y_in_classes = np.isin(unique_y, classes) if not np.all(unique_y_in_classes): raise ValueError( diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 3008710d3c3dc..41494f2649a01 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -553,7 +553,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False) y = column_or_1d(y) # pick out the known labels from y - y_in_classes = np.in1d(y, classes) + y_in_classes = np.isin(y, classes) y_seen = y[y_in_classes] indices = np.searchsorted(sorted_class, y_seen) indptr = np.hstack((0, np.cumsum(y_in_classes))) diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index 15e0d0d99cfb0..93df0221236b8 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -595,7 +595,7 @@ def test_isotonic_thresholds(increasing): # the data is already strictly monotonic which is not the case with # this random data) assert X_thresholds.shape[0] < X.shape[0] - assert np.in1d(X_thresholds, X).all() + assert np.isin(X_thresholds, X).all() # Output thresholds lie in the range of the training set: assert y_thresholds.max() <= y.max() diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py index fb3912b27dbfe..b3bf1c2a317ec 100644 --- a/sklearn/utils/_encode.py +++ b/sklearn/utils/_encode.py @@ -296,7 +296,7 @@ def is_valid(value): diff = np.setdiff1d(unique_values, known_values, assume_unique=True) if return_mask: if diff.size: - valid_mask = np.in1d(values, known_values) + valid_mask = np.isin(values, known_values) else: valid_mask = np.ones(len(values), dtype=bool) diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py index a95fe4ccbd001..941ee55424c0b 100644 --- a/sklearn/utils/class_weight.py +++ b/sklearn/utils/class_weight.py @@ -57,7 +57,7 @@ def compute_class_weight(class_weight, *, classes, y): # Find the weight of each class as present in y. le = LabelEncoder() y_ind = le.fit_transform(y) - if not all(np.in1d(classes, le.classes_)): + if not all(np.isin(classes, le.classes_)): raise ValueError("classes should have valid labels that are in y") recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64)) @@ -195,7 +195,7 @@ def compute_sample_weight(class_weight, y, *, indices=None): if classes_missing: # Make missing classes' weight zero - weight_k[np.in1d(y_full, list(classes_missing))] = 0.0 + weight_k[np.isin(y_full, list(classes_missing))] = 0.0 expanded_class_weight.append(weight_k) diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index aeb01f91590a2..e545054bff96c 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -200,3 +200,10 @@ def _contents(data_module): from numpy.exceptions import ComplexWarning, VisibleDeprecationWarning else: from numpy import ComplexWarning, VisibleDeprecationWarning # type: ignore # noqa + + +# TODO: Remove when Scipy 1.6 is the minimum supported version +try: + from scipy.integrate import trapezoid # type: ignore # noqa +except ImportError: + from scipy.integrate import trapz as trapezoid # type: ignore # noqa