diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 1a70d2e4fbcea..fa90dedb06da7 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -284,7 +284,7 @@ def euclidean_distances( X, Y=None, *, Y_norm_squared=None, squared=False, X_norm_squared=None ): """ - Compute the distance matrix between each pair from a vector array X and Y. + Compute the distance matrix between each pair from a feature array X and Y. For efficiency reasons, the euclidean distance between a pair of row vector x and y is computed as:: @@ -2276,12 +2276,21 @@ def pairwise_distances( ensure_all_finite=None, **kwds, ): - """Compute the distance matrix from a vector array X and optional Y. + """Compute the distance matrix from a feature array X and optional Y. - This method takes either a vector array or a distance matrix, and returns + This function takes one or two feature arrays or a distance matrix, and returns a distance matrix. - If the input is a vector array, the distances are computed. - If the input is a distances matrix, it is returned instead. + + - If `X` is a feature array, of shape (n_samples_X, n_features), and: + + - `Y` is `None` and `metric` is not 'precomputed', the pairwise distances + between `X` and itself are returned. + - `Y` is a feature array of shape (n_samples_Y, n_features), the pairwise + distances between `X` and `Y` is returned. + + - If `X` is a distance matrix, of shape (n_samples_X, n_samples_X), `metric` + should be 'precomputed'. `Y` is thus ignored and `X` is returned as is. + If the input is a collection of non-numeric data (e.g. a list of strings or a boolean array), a custom metric must be passed. @@ -2289,15 +2298,11 @@ def pairwise_distances( preserving compatibility with many other algorithms that take a vector array. - If Y is given (default is None), then the returned matrix is the pairwise - distance between the arrays from both X and Y. - Valid values for metric are: - From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', - 'manhattan']. These metrics support sparse matrix - inputs. - ['nan_euclidean'] but it does not yet support sparse matrices. + 'manhattan', 'nan_euclidean']. All metrics support sparse matrix + inputs except 'nan_euclidean'. - From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', @@ -2570,15 +2575,15 @@ def pairwise_kernels( ): """Compute the kernel between arrays X and optional array Y. - This method takes one or two vector arrays or a kernel matrix, and returns + This function takes one or two feature arrays or a kernel matrix, and returns a kernel matrix. - - If `X` is a vector array, of shape (n_samples_X, n_features), and: + - If `X` is a feature array, of shape (n_samples_X, n_features), and: - `Y` is `None` and `metric` is not 'precomputed', the pairwise kernels - between `X` and itself are computed. - - `Y` is a vector array of shape (n_samples_Y, n_features), the pairwise - kernels between arrays `X` and `Y` is returned. + between `X` and itself are returned. + - `Y` is a feature array of shape (n_samples_Y, n_features), the pairwise + kernels between `X` and `Y` is returned. - If `X` is a kernel matrix, of shape (n_samples_X, n_samples_X), `metric` should be 'precomputed'. `Y` is thus ignored and `X` is returned as is.