From 9a65762e0521acf90e0aa4380a3abafc012d1ec7 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 11 Apr 2025 10:04:42 +1000 Subject: [PATCH 1/3] update pw dist --- sklearn/metrics/pairwise.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 3fe3db110238e..fae782bb7f380 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -2285,10 +2285,19 @@ def pairwise_distances( ): """Compute the distance matrix from a vector array X and optional Y. - This method takes either a vector array or a distance matrix, and returns + This method takes one or two vector arrays or a distance matrix, and returns a distance matrix. - If the input is a vector array, the distances are computed. - If the input is a distances matrix, it is returned instead. + + - If `X` is a vector array, of shape (n_samples_X, n_features), and: + + - `Y` is `None` and `metric` is not 'precomputed', the pairwise distances + between `X` and itself are computed. + - `Y` is a vector array of shape (n_samples_Y, n_features), the pairwise + distances between `X` and `Y` is returned. + + - If `X` is a distance matrix, of shape (n_samples_X, n_samples_X), `metric` + should be 'precomputed'. `Y` is thus ignored and `X` is returned as is. + If the input is a collection of non-numeric data (e.g. a list of strings or a boolean array), a custom metric must be passed. @@ -2296,15 +2305,11 @@ def pairwise_distances( preserving compatibility with many other algorithms that take a vector array. - If Y is given (default is None), then the returned matrix is the pairwise - distance between the arrays from both X and Y. - Valid values for metric are: - From scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', - 'manhattan']. These metrics support sparse matrix - inputs. - ['nan_euclidean'] but it does not yet support sparse matrices. + 'manhattan', 'nan_euclidean']. All metrics support sparse matrix + inputs except 'nan_euclidean'. - From scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', @@ -2581,7 +2586,7 @@ def pairwise_kernels( - `Y` is `None` and `metric` is not 'precomputed', the pairwise kernels between `X` and itself are computed. - `Y` is a vector array of shape (n_samples_Y, n_features), the pairwise - kernels between arrays `X` and `Y` is returned. + kernels between `X` and `Y` is returned. - If `X` is a kernel matrix, of shape (n_samples_X, n_samples_X), `metric` should be 'precomputed'. `Y` is thus ignored and `X` is returned as is. From e3f7a5abf0b12a8361fed461474d99585c15fad1 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Mon, 14 Apr 2025 12:46:29 +1000 Subject: [PATCH 2/3] review --- sklearn/metrics/pairwise.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index fae782bb7f380..c17bf52afb3c0 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -2285,13 +2285,13 @@ def pairwise_distances( ): """Compute the distance matrix from a vector array X and optional Y. - This method takes one or two vector arrays or a distance matrix, and returns + This function takes one or two vector arrays or a distance matrix, and returns a distance matrix. - If `X` is a vector array, of shape (n_samples_X, n_features), and: - `Y` is `None` and `metric` is not 'precomputed', the pairwise distances - between `X` and itself are computed. + between `X` and itself are returned. - `Y` is a vector array of shape (n_samples_Y, n_features), the pairwise distances between `X` and `Y` is returned. @@ -2578,13 +2578,13 @@ def pairwise_kernels( ): """Compute the kernel between arrays X and optional array Y. - This method takes one or two vector arrays or a kernel matrix, and returns + This function takes one or two vector arrays or a kernel matrix, and returns a kernel matrix. - If `X` is a vector array, of shape (n_samples_X, n_features), and: - `Y` is `None` and `metric` is not 'precomputed', the pairwise kernels - between `X` and itself are computed. + between `X` and itself are returned. - `Y` is a vector array of shape (n_samples_Y, n_features), the pairwise kernels between `X` and `Y` is returned. From 301624ce7b37fd93bab25e5680c05f8254e16cc2 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 16 Apr 2025 13:51:36 +1000 Subject: [PATCH 3/3] use feature array --- sklearn/metrics/pairwise.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index f606b62ddcc18..d68bd6b409cd1 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -284,7 +284,7 @@ def euclidean_distances( X, Y=None, *, Y_norm_squared=None, squared=False, X_norm_squared=None ): """ - Compute the distance matrix between each pair from a vector array X and Y. + Compute the distance matrix between each pair from a feature array X and Y. For efficiency reasons, the euclidean distance between a pair of row vector x and y is computed as:: @@ -2275,16 +2275,16 @@ def pairwise_distances( ensure_all_finite=None, **kwds, ): - """Compute the distance matrix from a vector array X and optional Y. + """Compute the distance matrix from a feature array X and optional Y. - This function takes one or two vector arrays or a distance matrix, and returns + This function takes one or two feature arrays or a distance matrix, and returns a distance matrix. - - If `X` is a vector array, of shape (n_samples_X, n_features), and: + - If `X` is a feature array, of shape (n_samples_X, n_features), and: - `Y` is `None` and `metric` is not 'precomputed', the pairwise distances between `X` and itself are returned. - - `Y` is a vector array of shape (n_samples_Y, n_features), the pairwise + - `Y` is a feature array of shape (n_samples_Y, n_features), the pairwise distances between `X` and `Y` is returned. - If `X` is a distance matrix, of shape (n_samples_X, n_samples_X), `metric` @@ -2570,14 +2570,14 @@ def pairwise_kernels( ): """Compute the kernel between arrays X and optional array Y. - This function takes one or two vector arrays or a kernel matrix, and returns + This function takes one or two feature arrays or a kernel matrix, and returns a kernel matrix. - - If `X` is a vector array, of shape (n_samples_X, n_features), and: + - If `X` is a feature array, of shape (n_samples_X, n_features), and: - `Y` is `None` and `metric` is not 'precomputed', the pairwise kernels between `X` and itself are returned. - - `Y` is a vector array of shape (n_samples_Y, n_features), the pairwise + - `Y` is a feature array of shape (n_samples_Y, n_features), the pairwise kernels between `X` and `Y` is returned. - If `X` is a kernel matrix, of shape (n_samples_X, n_samples_X), `metric`