diff --git a/doc/glossary.rst b/doc/glossary.rst index 010f16a361531..2b4c6af0d1866 100644 --- a/doc/glossary.rst +++ b/doc/glossary.rst @@ -644,9 +644,8 @@ General Concepts Note that for most distance metrics, we rely on implementations from :mod:`scipy.spatial.distance`, but may reimplement for efficiency in - our context. The :mod:`neighbors` module also duplicates some metric - implementations for integration with efficient binary tree search data - structures. + our context. The :class:`metrics.DistanceMetric` interface is used to implement + distance metrics for integration with efficient neighbors search. pd A shorthand for `Pandas `_ due to the @@ -1023,7 +1022,7 @@ such as: Further examples: -* :class:`neighbors.DistanceMetric` +* :class:`metrics.DistanceMetric` * :class:`gaussian_process.kernels.Kernel` * ``tree.Criterion`` diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 72b67b23e8dc3..b7000bcf7cbb2 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1058,6 +1058,16 @@ further details. metrics.consensus_score +Distance metrics +---------------- + +.. currentmodule:: sklearn + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + metrics.DistanceMetric Pairwise metrics ---------------- @@ -1317,7 +1327,6 @@ Model validation :template: class.rst neighbors.BallTree - neighbors.DistanceMetric neighbors.KDTree neighbors.KernelDensity neighbors.KNeighborsClassifier diff --git a/doc/modules/density.rst b/doc/modules/density.rst index 115d318183577..9e542b803ef68 100644 --- a/doc/modules/density.rst +++ b/doc/modules/density.rst @@ -136,9 +136,9 @@ The form of these kernels is as follows: :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h` The kernel density estimator can be used with any of the valid distance -metrics (see :class:`~sklearn.neighbors.DistanceMetric` for a list of available metrics), though -the results are properly normalized only for the Euclidean metric. One -particularly useful metric is the +metrics (see :class:`~sklearn.metrics.DistanceMetric` for a list of +available metrics), though the results are properly normalized only +for the Euclidean metric. One particularly useful metric is the `Haversine distance `_ which measures the angular distance between points on a sphere. Here is an example of using a kernel density estimate for a visualization diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 93300bf67d4a5..1a9f773ce08df 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -60,6 +60,15 @@ Changelog message when the solver does not support sparse matrices with int64 indices. :pr:`21093` by `Tom Dupre la Tour`_. +:mod:`sklearn.metrics` +...................... + +- |API| :class:`metrics.DistanceMetric` has been moved from + :mod:`sklearn.neighbors` to :mod:`sklearn.metric`. + Using `neighbors.DistanceMetric` for imports is still valid for + backward compatibility, but this alias will be removed in 1.3. + :pr:`21177` by :user:`Julien Jerphanion `. + :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 6606f370b81eb..70b3a5028169b 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -16,8 +16,8 @@ from ..base import BaseEstimator, ClusterMixin from ..metrics.pairwise import paired_distances -from ..neighbors import DistanceMetric -from ..neighbors._dist_metrics import METRIC_MAPPING +from ..metrics import DistanceMetric +from ..metrics._dist_metrics import METRIC_MAPPING from ..utils import check_array from ..utils._fast_dict import IntFloatDict from ..utils.fixes import _astype_copy_false diff --git a/sklearn/cluster/_hierarchical_fast.pyx b/sklearn/cluster/_hierarchical_fast.pyx index 2a58757ce327d..11ea3294c086a 100644 --- a/sklearn/cluster/_hierarchical_fast.pyx +++ b/sklearn/cluster/_hierarchical_fast.pyx @@ -13,7 +13,7 @@ ctypedef np.int8_t INT8 np.import_array() -from ..neighbors._dist_metrics cimport DistanceMetric +from ..metrics._dist_metrics cimport DistanceMetric from ..utils._fast_dict cimport IntFloatDict # C++ @@ -236,8 +236,8 @@ def max_merge(IntFloatDict a, IntFloatDict b, def average_merge(IntFloatDict a, IntFloatDict b, np.ndarray[ITYPE_t, ndim=1] mask, ITYPE_t n_a, ITYPE_t n_b): - """Merge two IntFloatDicts with the average strategy: when the - same key is present in the two dicts, the weighted average of the two + """Merge two IntFloatDicts with the average strategy: when the + same key is present in the two dicts, the weighted average of the two values is used. Parameters @@ -290,13 +290,13 @@ def average_merge(IntFloatDict a, IntFloatDict b, ############################################################################### -# An edge object for fast comparisons +# An edge object for fast comparisons cdef class WeightedEdge: cdef public ITYPE_t a cdef public ITYPE_t b cdef public DTYPE_t weight - + def __init__(self, DTYPE_t weight, ITYPE_t a, ITYPE_t b): self.weight = weight self.a = a @@ -326,7 +326,7 @@ cdef class WeightedEdge: return self.weight > other.weight elif op == 5: return self.weight >= other.weight - + def __repr__(self): return "%s(weight=%f, a=%i, b=%i)" % (self.__class__.__name__, self.weight, @@ -475,7 +475,7 @@ def mst_linkage_core( dist_metric: DistanceMetric A DistanceMetric object conforming to the API from - ``sklearn.neighbors._dist_metrics.pxd`` that will be + ``sklearn.metrics._dist_metrics.pxd`` that will be used to compute distances. Returns @@ -534,4 +534,3 @@ def mst_linkage_core( current_node = new_node return np.array(result) - diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 92f92dc3736e3..3525643383c26 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -17,7 +17,7 @@ from scipy.sparse.csgraph import connected_components from sklearn.metrics.cluster import adjusted_rand_score -from sklearn.neighbors.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS +from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS from sklearn.utils._testing import assert_almost_equal, create_memmap_backed_data from sklearn.utils._testing import assert_array_almost_equal from sklearn.utils._testing import ignore_warnings @@ -31,6 +31,7 @@ _fix_connectivity, ) from sklearn.feature_extraction.image import grid_to_graph +from sklearn.metrics import DistanceMetric from sklearn.metrics.pairwise import ( PAIRED_DISTANCES, cosine_distances, @@ -38,7 +39,7 @@ pairwise_distances, ) from sklearn.metrics.cluster import normalized_mutual_info_score -from sklearn.neighbors import kneighbors_graph, DistanceMetric +from sklearn.neighbors import kneighbors_graph from sklearn.cluster._hierarchical_fast import ( average_merge, max_merge, diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 46958ea4ef7f8..e4339229c5b64 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -36,6 +36,8 @@ from ._classification import brier_score_loss from ._classification import multilabel_confusion_matrix +from ._dist_metrics import DistanceMetric + from . import cluster from .cluster import adjusted_mutual_info_score from .cluster import adjusted_rand_score @@ -115,6 +117,7 @@ "davies_bouldin_score", "DetCurveDisplay", "det_curve", + "DistanceMetric", "euclidean_distances", "explained_variance_score", "f1_score", diff --git a/sklearn/neighbors/_dist_metrics.pxd b/sklearn/metrics/_dist_metrics.pxd similarity index 87% rename from sklearn/neighbors/_dist_metrics.pxd rename to sklearn/metrics/_dist_metrics.pxd index 5b223f8c6d8a8..61bb4fb2fe011 100644 --- a/sklearn/neighbors/_dist_metrics.pxd +++ b/sklearn/metrics/_dist_metrics.pxd @@ -1,14 +1,12 @@ -#!python -#cython: boundscheck=False -#cython: wraparound=False -#cython: cdivision=True +# cython: boundscheck=False +# cython: cdivision=True +# cython: initializedcheck=False +# cython: wraparound=False -cimport cython cimport numpy as np -from libc.math cimport fabs, sqrt, exp, cos, pow +from libc.math cimport sqrt, exp -from ._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t -from ._typedefs import DTYPE, ITYPE +from ..utils._typedefs cimport DTYPE_t, ITYPE_t ###################################################################### # Inline distance functions @@ -60,7 +58,7 @@ cdef class DistanceMetric: cdef DTYPE_t dist(self, const DTYPE_t* x1, const DTYPE_t* x2, ITYPE_t size) nogil except -1 - cdef DTYPE_t rdist(self, DTYPE_t* x1, DTYPE_t* x2, + cdef DTYPE_t rdist(self, const DTYPE_t* x1, const DTYPE_t* x2, ITYPE_t size) nogil except -1 cdef int pdist(self, const DTYPE_t[:, ::1] X, DTYPE_t[:, ::1] D) except -1 diff --git a/sklearn/neighbors/_dist_metrics.pyx b/sklearn/metrics/_dist_metrics.pyx similarity index 96% rename from sklearn/neighbors/_dist_metrics.pyx rename to sklearn/metrics/_dist_metrics.pyx index db93263ee8eda..a8fb4c45ddd0c 100644 --- a/sklearn/neighbors/_dist_metrics.pyx +++ b/sklearn/metrics/_dist_metrics.pyx @@ -1,8 +1,7 @@ -#!python -#cython: boundscheck=False -#cython: wraparound=False -#cython: initializedcheck=False -#cython: cdivision=True +# cython: boundscheck=False +# cython: cdivision=True +# cython: initializedcheck=False +# cython: wraparound=False # By Jake Vanderplas (2013) # written for the scikit-learn project @@ -19,7 +18,7 @@ cdef extern from "arrayobject.h": int typenum, void* data) -cdef inline np.ndarray _buffer_to_ndarray(DTYPE_t* x, np.npy_intp n): +cdef inline np.ndarray _buffer_to_ndarray(const DTYPE_t* x, np.npy_intp n): # Wrap a memory buffer with an ndarray. Warning: this is not robust. # In particular, if x is deallocated before the returned array goes # out of scope, this could cause memory errors. Since there is not @@ -33,8 +32,8 @@ cdef inline np.ndarray _buffer_to_ndarray(DTYPE_t* x, np.npy_intp n): from libc.math cimport fabs, sqrt, exp, pow, cos, sin, asin cdef DTYPE_t INF = np.inf -from ._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t, DTYPECODE -from ._typedefs import DTYPE, ITYPE +from ..utils._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t, DTYPECODE +from ..utils._typedefs import DTYPE, ITYPE ###################################################################### @@ -98,7 +97,7 @@ cdef class DistanceMetric: Examples -------- - >>> from sklearn.neighbors import DistanceMetric + >>> from sklearn.metrics import DistanceMetric >>> dist = DistanceMetric.get_metric('euclidean') >>> X = [[0, 1, 2], [3, 4, 5]] @@ -291,14 +290,13 @@ cdef class DistanceMetric: cdef DTYPE_t rdist(self, const DTYPE_t* x1, const DTYPE_t* x2, ITYPE_t size) nogil except -1: - """Compute the reduced distance between vectors x1 and x2. + """Compute the rank-preserving surrogate distance between vectors x1 and x2. This can optionally be overridden in a base class. - The reduced distance is any measure that yields the same rank as the - distance, but is more efficient to compute. For example, for the - Euclidean metric, the reduced distance is the squared-euclidean - distance. + The rank-preserving surrogate distance is any measure that yields the same + rank as the distance, but is more efficient to compute. For example, for the + Euclidean metric, the surrogate distance is the squared-euclidean distance. """ return self.dist(x1, x2, size) @@ -323,25 +321,24 @@ cdef class DistanceMetric: return 0 cdef DTYPE_t _rdist_to_dist(self, DTYPE_t rdist) nogil except -1: - """Convert the reduced distance to the distance""" + """Convert the rank-preserving surrogate distance to the distance""" return rdist cdef DTYPE_t _dist_to_rdist(self, DTYPE_t dist) nogil except -1: - """Convert the distance to the reduced distance""" + """Convert the distance to the rank-preserving surrogate distance""" return dist def rdist_to_dist(self, rdist): - """Convert the Reduced distance to the true distance. + """Convert the rank-preserving surrogate distance to the distance. - The reduced distance, defined for some metrics, is a computationally - more efficient measure which preserves the rank of the true distance. - For example, in the Euclidean distance metric, the reduced distance - is the squared-euclidean distance. + The surrogate distance is any measure that yields the same rank as the + distance, but is more efficient to compute. For example, for the + Euclidean metric, the surrogate distance is the squared-euclidean distance. Parameters ---------- rdist : double - Reduced distance. + Surrogate distance. Returns ------- @@ -351,12 +348,11 @@ cdef class DistanceMetric: return rdist def dist_to_rdist(self, dist): - """Convert the true distance to the reduced distance. + """Convert the true distance to the rank-preserving surrogate distance. - The reduced distance, defined for some metrics, is a computationally - more efficient measure which preserves the rank of the true distance. - For example, in the Euclidean distance metric, the reduced distance - is the squared-euclidean distance. + The surrogate distance is any measure that yields the same rank as the + distance, but is more efficient to compute. For example, for the + Euclidean metric, the surrogate distance is the squared-euclidean distance. Parameters ---------- @@ -366,7 +362,7 @@ cdef class DistanceMetric: Returns ------- double - Reduced distance. + Surrogate distance. """ return dist @@ -519,7 +515,7 @@ cdef class ChebyshevDistance(DistanceMetric): Examples -------- - >>> from sklearn.neighbors.dist_metrics import DistanceMetric + >>> from sklearn.metrics.dist_metrics import DistanceMetric >>> dist = DistanceMetric.get_metric('chebyshev') >>> X = [[0, 1, 2], ... [3, 4, 5]] diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index d493ad68603ea..51cf80614cb3c 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -780,7 +780,7 @@ def haversine_distances(X, Y=None): array([[ 0. , 11099.54035582], [11099.54035582, 0. ]]) """ - from ..neighbors import DistanceMetric + from ..metrics import DistanceMetric return DistanceMetric.get_metric("haversine").pairwise(X, Y) diff --git a/sklearn/metrics/setup.py b/sklearn/metrics/setup.py index df1a1caad17e0..69925a3590be6 100644 --- a/sklearn/metrics/setup.py +++ b/sklearn/metrics/setup.py @@ -1,4 +1,5 @@ import os +import numpy as np from numpy.distutils.misc_util import Configuration @@ -18,6 +19,13 @@ def configuration(parent_package="", top_path=None): "_pairwise_fast", sources=["_pairwise_fast.pyx"], libraries=libraries ) + config.add_extension( + "_dist_metrics", + sources=["_dist_metrics.pyx"], + include_dirs=[np.get_include(), os.path.join(np.get_include(), "numpy")], + libraries=libraries, + ) + config.add_subpackage("tests") return config diff --git a/sklearn/neighbors/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py similarity index 95% rename from sklearn/neighbors/tests/test_dist_metrics.py rename to sklearn/metrics/tests/test_dist_metrics.py index 08298f087c216..9440abba6f848 100644 --- a/sklearn/neighbors/tests/test_dist_metrics.py +++ b/sklearn/metrics/tests/test_dist_metrics.py @@ -7,8 +7,7 @@ import pytest from scipy.spatial.distance import cdist -from sklearn.neighbors import DistanceMetric -from sklearn.neighbors import BallTree +from sklearn.metrics import DistanceMetric from sklearn.utils import check_random_state from sklearn.utils._testing import create_memmap_backed_data from sklearn.utils.fixes import sp_version, parse_version @@ -230,16 +229,6 @@ def test_pyfunc_metric(): assert_array_almost_equal(D1_pkl, D2_pkl) -def test_bad_pyfunc_metric(): - def wrong_distance(x, y): - return "1" - - X = np.ones((5, 2)) - msg = "Custom distance function must accept two vectors" - with pytest.raises(TypeError, match=msg): - BallTree(X, metric=wrong_distance) - - def test_input_data_size(): # Regression test for #6288 # Previously, a metric requiring a particular input dimension would fail diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py index 8a0934eecf142..340910008f75c 100644 --- a/sklearn/neighbors/__init__.py +++ b/sklearn/neighbors/__init__.py @@ -5,7 +5,7 @@ from ._ball_tree import BallTree from ._kd_tree import KDTree -from ._dist_metrics import DistanceMetric +from ._distance_metric import DistanceMetric from ._graph import kneighbors_graph, radius_neighbors_graph from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer from ._unsupervised import NearestNeighbors diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi index 9f90414994550..f25da86e2148c 100644 --- a/sklearn/neighbors/_binary_tree.pxi +++ b/sklearn/neighbors/_binary_tree.pxi @@ -153,11 +153,16 @@ import numpy as np import warnings from ..utils import check_array -from ._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t -from ._typedefs import DTYPE, ITYPE +from sklearn.utils._typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t +from sklearn.utils._typedefs import DTYPE, ITYPE -from ._dist_metrics cimport (DistanceMetric, euclidean_dist, euclidean_rdist, - euclidean_dist_to_rdist, euclidean_rdist_to_dist) +from ..metrics._dist_metrics cimport ( + DistanceMetric, + euclidean_dist, + euclidean_rdist, + euclidean_dist_to_rdist, + euclidean_rdist_to_dist, +) from ._partition_nodes cimport partition_node_indices @@ -878,7 +883,7 @@ def newObj(obj): ###################################################################### # define the reverse mapping of VALID_METRICS -from ._dist_metrics import get_valid_metric_ids +from sklearn.metrics._dist_metrics import get_valid_metric_ids VALID_METRIC_IDS = get_valid_metric_ids(VALID_METRICS) diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index d616eaa2f32a8..056e980e63ebc 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -67,8 +67,8 @@ class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase): metric : str or callable, default='minkowski' The distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean - metric. See the documentation of :class:`DistanceMetric` for a - list of available metrics. + metric. For a list of available metrics, see the documentation of + :class:`~sklearn.metrics.DistanceMetric`. If metric is "precomputed", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only "nonzero" elements may be considered neighbors. @@ -344,8 +344,8 @@ class RadiusNeighborsClassifier(RadiusNeighborsMixin, ClassifierMixin, Neighbors metric : str or callable, default='minkowski' Distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean - metric. See the documentation of :class:`DistanceMetric` for a - list of available metrics. + metric. For a list of available metrics, see the documentation of + :class:`~sklearn.metrics.DistanceMetric`. If metric is "precomputed", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only "nonzero" elements may be considered neighbors. diff --git a/sklearn/neighbors/_distance_metric.py b/sklearn/neighbors/_distance_metric.py new file mode 100644 index 0000000000000..c973425d2e7b6 --- /dev/null +++ b/sklearn/neighbors/_distance_metric.py @@ -0,0 +1,20 @@ +# TODO: Remove this file in 1.3 +import warnings + +from ..metrics import DistanceMetric as _DistanceMetric + + +class DistanceMetric(_DistanceMetric): + @classmethod + def _warn(cls): + warnings.warn( + "sklearn.neighbors.DistanceMetric has been moved " + "to sklearn.metrics.DistanceMetric in 1.0. " + "This import path will be removed in 1.3", + category=FutureWarning, + ) + + @classmethod + def get_metric(cls, metric, **kwargs): + DistanceMetric._warn() + return _DistanceMetric.get_metric(metric, **kwargs) diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py index e6fdeffe3b291..9afa37b71a808 100644 --- a/sklearn/neighbors/_graph.py +++ b/sklearn/neighbors/_graph.py @@ -65,10 +65,11 @@ def kneighbors_graph( between neighbors according to the given metric. metric : str, default='minkowski' - The distance metric used to calculate the k-Neighbors for each sample - point. The DistanceMetric class gives a list of available metrics. - The default distance is 'euclidean' ('minkowski' metric with the p - param equal to 2.) + The distance metric to use for the tree. The default metric is + minkowski, and with p=2 is equivalent to the standard Euclidean + metric. + For a list of available metrics, see the documentation of + :class:`~sklearn.metrics.DistanceMetric`. p : int, default=2 Power parameter for the Minkowski metric. When p = 1, this is @@ -157,10 +158,11 @@ def radius_neighbors_graph( between neighbors according to the given metric. metric : str, default='minkowski' - The distance metric used to calculate the neighbors within a - given radius for each sample point. The DistanceMetric class - gives a list of available metrics. The default distance is - 'euclidean' ('minkowski' metric with the param equal to 2.) + The distance metric to use for the tree. The default metric is + minkowski, and with p=2 is equivalent to the standard Euclidean + metric. + For a list of available metrics, see the documentation of + :class:`~sklearn.metrics.DistanceMetric`. p : int, default=2 Power parameter for the Minkowski metric. When p = 1, this is diff --git a/sklearn/neighbors/_partition_nodes.pxd b/sklearn/neighbors/_partition_nodes.pxd index 522e826632824..94b02002d7a1e 100644 --- a/sklearn/neighbors/_partition_nodes.pxd +++ b/sklearn/neighbors/_partition_nodes.pxd @@ -1,4 +1,4 @@ -from ._typedefs cimport DTYPE_t, ITYPE_t +from ..utils._typedefs cimport DTYPE_t, ITYPE_t cdef int partition_node_indices( DTYPE_t *data, diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py index 6b6eec1a3112b..440ac41eb71d5 100644 --- a/sklearn/neighbors/_unsupervised.py +++ b/sklearn/neighbors/_unsupervised.py @@ -41,8 +41,8 @@ class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin, NeighborsBase): metric : str or callable, default='minkowski' The distance metric to use for the tree. The default metric is minkowski, and with p=2 is equivalent to the standard Euclidean - metric. See the documentation of :class:`DistanceMetric` for a - list of available metrics. + metric. For a list of available metrics, see the documentation of + :class:`~sklearn.metrics.DistanceMetric`. If metric is "precomputed", X is assumed to be a distance matrix and must be square during fit. X may be a :term:`sparse graph`, in which case only "nonzero" elements may be considered neighbors. diff --git a/sklearn/neighbors/setup.py b/sklearn/neighbors/setup.py index 85305efc29c78..aa19ba501b18d 100644 --- a/sklearn/neighbors/setup.py +++ b/sklearn/neighbors/setup.py @@ -32,19 +32,6 @@ def configuration(parent_package="", top_path=None): libraries=libraries, ) - config.add_extension( - "_dist_metrics", - sources=["_dist_metrics.pyx"], - include_dirs=[numpy.get_include(), os.path.join(numpy.get_include(), "numpy")], - libraries=libraries, - ) - - config.add_extension( - "_typedefs", - sources=["_typedefs.pyx"], - include_dirs=[numpy.get_include()], - libraries=libraries, - ) config.add_extension( "_quad_tree", sources=["_quad_tree.pyx"], diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py index c751539f2a1ae..41ccff25a260e 100644 --- a/sklearn/neighbors/tests/test_ball_tree.py +++ b/sklearn/neighbors/tests/test_ball_tree.py @@ -4,7 +4,6 @@ import pytest from numpy.testing import assert_array_almost_equal from sklearn.neighbors._ball_tree import BallTree -from sklearn.neighbors import DistanceMetric from sklearn.utils import check_random_state from sklearn.utils.validation import check_array from sklearn.utils._testing import _convert_container @@ -40,6 +39,8 @@ def brute_force_neighbors(X, Y, k, metric, **kwargs): + from sklearn.metrics import DistanceMetric + X, Y = check_array(X), check_array(Y) D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X) ind = np.argsort(D, axis=1)[:, :k] diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index a9592ff9f2c51..aa9c624cdb3cc 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1823,3 +1823,15 @@ def test_pairwise_deprecated(NearestNeighbors): msg = r"Attribute `_pairwise` was deprecated in version 0\.24" with pytest.warns(FutureWarning, match=msg): nn._pairwise + + +# TODO: Remove in 1.3 +def test_neighbors_distance_metric_deprecation(): + from sklearn.neighbors import DistanceMetric + from sklearn.metrics import DistanceMetric as ActualDistanceMetric + + msg = r"This import path will be removed in 1\.3" + with pytest.warns(FutureWarning, match=msg): + dist_metric = DistanceMetric.get_metric("euclidean") + + assert isinstance(dist_metric, ActualDistanceMetric) diff --git a/sklearn/neighbors/tests/test_neighbors_tree.py b/sklearn/neighbors/tests/test_neighbors_tree.py index de34b4d230171..e043ffb730708 100644 --- a/sklearn/neighbors/tests/test_neighbors_tree.py +++ b/sklearn/neighbors/tests/test_neighbors_tree.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from sklearn.neighbors import DistanceMetric +from sklearn.metrics import DistanceMetric from sklearn.neighbors._ball_tree import ( BallTree, kernel_norm, diff --git a/sklearn/neighbors/_typedefs.pxd b/sklearn/utils/_typedefs.pxd similarity index 100% rename from sklearn/neighbors/_typedefs.pxd rename to sklearn/utils/_typedefs.pxd diff --git a/sklearn/neighbors/_typedefs.pyx b/sklearn/utils/_typedefs.pyx similarity index 100% rename from sklearn/neighbors/_typedefs.pyx rename to sklearn/utils/_typedefs.pyx diff --git a/sklearn/utils/setup.py b/sklearn/utils/setup.py index c75cbe2d86495..ed78ecc5db76f 100644 --- a/sklearn/utils/setup.py +++ b/sklearn/utils/setup.py @@ -88,6 +88,13 @@ def configuration(parent_package="", top_path=None): libraries=libraries, ) + config.add_extension( + "_typedefs", + sources=["_typedefs.pyx"], + include_dirs=[numpy.get_include()], + libraries=libraries, + ) + config.add_subpackage("tests") return config