From cdfb17c4795e956b0d52b908c733dec3f1ba5349 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 3 Oct 2024 17:24:09 +0200 Subject: [PATCH 1/4] MAINT handle the deprecation of sokalmichener in scipy cdist/pdist --- sklearn/metrics/_dist_metrics.pyx.tp | 11 ++++++++- sklearn/metrics/pairwise.py | 8 +++++-- sklearn/metrics/tests/test_dist_metrics.py | 28 +++++++++++++++++++--- sklearn/metrics/tests/test_pairwise.py | 3 +++ sklearn/neighbors/_base.py | 4 +++- sklearn/neighbors/tests/test_neighbors.py | 6 +++++ 6 files changed, 53 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp index c317605f94d14..b7d3d1f4d86a6 100644 --- a/sklearn/metrics/_dist_metrics.pyx.tp +++ b/sklearn/metrics/_dist_metrics.pyx.tp @@ -42,15 +42,24 @@ BOOL_METRICS = [ "dice", "rogerstanimoto", "russellrao", - "sokalmichener", "sokalsneath", ] +DEPRECATED_METRICS = [] +if sp_base_version < parse_version("1.17"): + # Deprecated in SciPy 1.15 and removed in SciPy 1.17 + BOOL_METRICS += ["sokalmichener"] +if sp_base_version >= parse_version("1.15"): + DEPRECATED_METRICS.append("sokalmichener") if sp_base_version < parse_version("1.11"): # Deprecated in SciPy 1.9 and removed in SciPy 1.11 BOOL_METRICS += ["kulsinski"] +if sp_base_version >= parse_version("1.9"): + DEPRECATED_METRICS.append("kulsinski") if sp_base_version < parse_version("1.9"): # Deprecated in SciPy 1.0 and removed in SciPy 1.9 BOOL_METRICS += ["matching"] +if sp_base_version >= parse_version("1.0"): + DEPRECATED_METRICS.append("matching") def get_valid_metric_ids(L): """Given an iterable of metric class names or class identifiers, diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 8b4f35a8195c2..9b62a0f73f130 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -693,7 +693,6 @@ def _argmin_reduce(dist, start): "rogerstanimoto", "russellrao", "seuclidean", - "sokalmichener", "sokalsneath", "sqeuclidean", "yule", @@ -701,6 +700,9 @@ def _argmin_reduce(dist, start): "nan_euclidean", "haversine", ] +if sp_base_version < parse_version("1.17"): # pragma: no cover + # Deprecated in SciPy 1.15 and removed in SciPy 1.17 + _VALID_METRICS += ["sokalmichener"] if sp_base_version < parse_version("1.11"): # pragma: no cover # Deprecated in SciPy 1.9 and removed in SciPy 1.11 _VALID_METRICS += ["kulsinski"] @@ -2482,10 +2484,12 @@ def pairwise_distances( "jaccard", "rogerstanimoto", "russellrao", - "sokalmichener", "sokalsneath", "yule", ] +if sp_base_version < parse_version("1.17"): + # Deprecated in SciPy 1.15 and removed in SciPy 1.17 + PAIRWISE_BOOLEAN_FUNCTIONS += ["sokalmichener"] if sp_base_version < parse_version("1.11"): # Deprecated in SciPy 1.9 and removed in SciPy 1.11 PAIRWISE_BOOLEAN_FUNCTIONS += ["kulsinski"] diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py index baaf447d3909b..5690274e27982 100644 --- a/sklearn/metrics/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -9,11 +9,16 @@ from sklearn.metrics import DistanceMetric from sklearn.metrics._dist_metrics import ( BOOL_METRICS, + DEPRECATED_METRICS, DistanceMetric32, DistanceMetric64, ) from sklearn.utils import check_random_state -from sklearn.utils._testing import assert_allclose, create_memmap_backed_data +from sklearn.utils._testing import ( + assert_allclose, + create_memmap_backed_data, + ignore_warnings, +) from sklearn.utils.fixes import CSR_CONTAINERS, parse_version, sp_version @@ -112,7 +117,15 @@ def test_cdist(metric_param_grid, X, Y, csr_container): ) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_cdist_bool_metric(metric, X_bool, Y_bool, csr_container): - D_scipy_cdist = cdist(X_bool, Y_bool, metric) + if metric in DEPRECATED_METRICS: + with ignore_warnings(category=DeprecationWarning): + # Some metrics can be deprecated depending on the scipy version. + # But if they are present, we still want to test wether + # scikit-learn gives the same result, whether or not they are + # deprecated. + D_scipy_cdist = cdist(X_bool, Y_bool, metric) + else: + D_scipy_cdist = cdist(X_bool, Y_bool, metric) dm = DistanceMetric.get_metric(metric) D_sklearn = dm.pairwise(X_bool, Y_bool) @@ -219,7 +232,16 @@ def test_distance_metrics_dtype_consistency(metric_param_grid): @pytest.mark.parametrize("X_bool", [X_bool, X_bool_mmap]) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_pdist_bool_metrics(metric, X_bool, csr_container): - D_scipy_pdist = cdist(X_bool, X_bool, metric) + if metric in DEPRECATED_METRICS: + with ignore_warnings(category=DeprecationWarning): + # Some metrics can be deprecated depending on the scipy version. + # But if they are present, we still want to test wether + # scikit-learn gives the same result, whether or not they are + # deprecated. + D_scipy_pdist = cdist(X_bool, X_bool, metric) + else: + D_scipy_pdist = cdist(X_bool, X_bool, metric) + dm = DistanceMetric.get_metric(metric) D_sklearn = dm.pairwise(X_bool) assert_allclose(D_sklearn, D_scipy_pdist) diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index b3f8146b275c5..f93dbcd6d8288 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -212,6 +212,9 @@ def test_pairwise_distances_for_sparse_data( pairwise_distances(X, Y_sparse, metric="minkowski") +# Some scipy metrics are deprecated (depending on the scipy version) but we +# still want to test them. +@ignore_warnings(category=DeprecationWarning) @pytest.mark.parametrize("metric", PAIRWISE_BOOLEAN_FUNCTIONS) def test_pairwise_boolean_distance(metric): # test that we convert to boolean arrays for boolean distances diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index e8647d1a163a1..1925e0dbc758c 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -48,11 +48,13 @@ "rogerstanimoto", "russellrao", "seuclidean", - "sokalmichener", "sokalsneath", "sqeuclidean", "yule", ] +if sp_base_version < parse_version("1.17"): + # Deprecated in SciPy 1.15 and removed in SciPy 1.17 + SCIPY_METRICS += ["sokalmichener"] if sp_base_version < parse_version("1.11"): # Deprecated in SciPy 1.9 and removed in SciPy 1.11 SCIPY_METRICS += ["kulsinski"] diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 1c434ae8d59d4..07c816b107675 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1721,6 +1721,9 @@ def test_neighbors_metrics( assert_array_equal(ball_tree_idx, kd_tree_idx) +# Some scipy metrics are deprecated (depending on the scipy version) but we +# still want to test them. +@ignore_warnings(category=DeprecationWarning) @pytest.mark.parametrize( "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"])) ) @@ -2243,6 +2246,9 @@ def test_auto_algorithm(X, metric, metric_params, expected_algo): assert model._fit_method == expected_algo +# Some scipy metrics are deprecated (depending on the scipy version) but we +# still want to test them. +@ignore_warnings(category=DeprecationWarning) @pytest.mark.parametrize( "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"])) ) From bbd87e6251bb7d7d7d15b76331a60466b9322796 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 3 Oct 2024 17:25:26 +0200 Subject: [PATCH 2/4] Trigger [scipy-dev] CI From c2610238b264441a4dd6c49cc3d0339d8fc7d220 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 8 Oct 2024 18:55:45 +0200 Subject: [PATCH 3/4] Update sklearn/neighbors/tests/test_neighbors.py Co-authored-by: Thomas J. Fan --- sklearn/neighbors/tests/test_neighbors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 07c816b107675..e46ad4da4bff4 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1721,6 +1721,7 @@ def test_neighbors_metrics( assert_array_equal(ball_tree_idx, kd_tree_idx) +# TODO: Remove ignore_warnings when minimum supported SciPy version is 1.17 # Some scipy metrics are deprecated (depending on the scipy version) but we # still want to test them. @ignore_warnings(category=DeprecationWarning) From 748bc681882c9542fc8bfb652274a98c8005cf82 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 8 Oct 2024 18:55:52 +0200 Subject: [PATCH 4/4] Update sklearn/neighbors/tests/test_neighbors.py Co-authored-by: Thomas J. Fan --- sklearn/neighbors/tests/test_neighbors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index e46ad4da4bff4..cb6acb65cb1cc 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -2247,6 +2247,7 @@ def test_auto_algorithm(X, metric, metric_params, expected_algo): assert model._fit_method == expected_algo +# TODO: Remove ignore_warnings when minimum supported SciPy version is 1.17 # Some scipy metrics are deprecated (depending on the scipy version) but we # still want to test them. @ignore_warnings(category=DeprecationWarning)