diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index e6a57bd551317..4994432df8034 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -110,6 +110,13 @@ Changelog object in the parameter grid if it's an estimator. :pr:`26786` by `Adrin Jalali`_. +:mod:`sklearn.neighbors` +........................ + +- |Fix| Neighbors based estimators now correctly work when `metric="minkowski"` and the + metric parameter `p` is in the range `0 < p < 1`, regardless of the `dtype` of `X`. + :pr:`26760` by :user:`Shreesha Kumar Bhat `. + :mod:`sklearn.tree` ................... diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp index bc54e51a7511a..43497c9c50cee 100644 --- a/sklearn/metrics/_dist_metrics.pyx.tp +++ b/sklearn/metrics/_dist_metrics.pyx.tp @@ -1271,19 +1271,27 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): Parameters ---------- - p : int + p : float The order of the p-norm of the difference (see above). + + .. versionchanged:: 1.4.0 + Minkowski distance allows `p` to be `0= 1 and finite. For p = infinity, - use ChebyshevDistance. + Minkowski Distance requires p > 0 and finite. + When :math:`p \in (0,1)`, it isn't a true metric but is permissible when + the triangular inequality isn't necessary. + For p = infinity, use ChebyshevDistance. Note that for p=1, ManhattanDistance is more efficient, and for p=2, EuclideanDistance is more efficient. + """ def __init__(self, p, w=None): - if p < 1: - raise ValueError("p must be greater than 1") + if p <= 0: + raise ValueError("p must be greater than 0") elif np.isinf(p): raise ValueError("MinkowskiDistance requires finite p. " "For p=inf, use ChebyshevDistance.") diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py index 16aa5c569b161..be5d12f0414b8 100644 --- a/sklearn/metrics/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -15,6 +15,7 @@ ) from sklearn.utils import check_random_state from sklearn.utils._testing import assert_allclose, create_memmap_backed_data +from sklearn.utils.fixes import parse_version, sp_version def dist_func(x1, x2, p): @@ -42,18 +43,17 @@ def dist_func(x1, x2, p): V = rng.random_sample((d, d)) VI = np.dot(V, V.T) - METRICS_DEFAULT_PARAMS = [ ("euclidean", {}), ("cityblock", {}), - ("minkowski", dict(p=(1, 1.5, 2, 3))), + ("minkowski", dict(p=(0.5, 1, 1.5, 2, 3))), ("chebyshev", {}), ("seuclidean", dict(V=(rng.random_sample(d),))), ("mahalanobis", dict(VI=(VI,))), ("hamming", {}), ("canberra", {}), ("braycurtis", {}), - ("minkowski", dict(p=(1, 1.5, 3), w=(rng.random_sample(d),))), + ("minkowski", dict(p=(0.5, 1, 1.5, 3), w=(rng.random_sample(d),))), ] @@ -76,6 +76,13 @@ def test_cdist(metric_param_grid, X, Y): # with scipy rtol_dict = {"rtol": 1e-6} + # TODO: Remove when scipy minimum version >= 1.7.0 + # scipy supports 0= 1.7.0 + if metric == "minkowski": + p = kwargs["p"] + if sp_version < parse_version("1.7.0") and p < 1: + pytest.skip("scipy does not support 0= 1.7.0 + # scipy supports 0= 1.7.0 + if metric == "minkowski": + p = kwargs["p"] + if sp_version < parse_version("1.7.0") and p < 1: + pytest.skip("scipy does not support 0