scikit-learn · jjerphan · Nov 18, 2022 · Sep 30, 2022 · Sep 30, 2022 · Oct 11, 2022
diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
@@ -98,7 +98,8 @@ Changes impacting all modules
   - :func:`sklearn.manifold.trustworthiness`
 
   :pr:`23604` and :pr:`23585` by :user:`Julien Jerphanion <jjerphan>`,
-  :user:`Olivier Grisel <ogrisel>`, and `Thomas Fan`_.
+  :user:`Olivier Grisel <ogrisel>`, and `Thomas Fan`_,
+  :pr:`24556` by :user:`Vincent Maladière <Vincent-Maladiere>`.
 
 - |Fix| Systematically check the sha256 digest of dataset tarballs used in code
   examples in the documentation.

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp
@@ -63,9 +63,10 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
         """
         if (
             metric in ("euclidean", "sqeuclidean")
-            and not (issparse(X) or issparse(Y))
+            and not (issparse(X) ^ issparse(Y))  # "^" is the XOR operator
         ):
-            # Specialized implementation of ArgKmin for the Euclidean distance.
+            # Specialized implementation of ArgKmin for the Euclidean distance
+            # for the dense-dense and sparse-sparse cases.
             # This implementation computes the distances by chunk using
             # a decomposition of the Squared Euclidean distance.
             # This specialisation has an improved arithmetic intensity for both
@@ -492,7 +493,6 @@ cdef class EuclideanArgKmin{{name_suffix}}(ArgKmin{{name_suffix}}):
             DTYPE_t * heaps_r_distances = self.heaps_r_distances_chunks[thread_num]
             ITYPE_t * heaps_indices = self.heaps_indices_chunks[thread_num]
 
-
         # Pushing the distance and their associated indices on heaps
         # which keep tracks of the argkmin.
         for i in range(n_X):

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pxd.tp
@@ -2,7 +2,7 @@ cimport numpy as cnp
 
 from cython cimport final
 
-from ...utils._typedefs cimport ITYPE_t, DTYPE_t
+from ...utils._typedefs cimport ITYPE_t, DTYPE_t, SPARSE_INDEX_TYPE_t
 
 cnp.import_array()
 
@@ -12,7 +12,7 @@ from ._datasets_pair cimport DatasetsPair{{name_suffix}}
 
 
 cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}(
-    const {{INPUT_DTYPE_t}}[:, ::1] X,
+    X,
     ITYPE_t num_threads,
 )
 

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_base.pyx.tp
@@ -26,11 +26,12 @@ from ...utils._typedefs cimport ITYPE_t, DTYPE_t
 
 import numpy as np
 
+from scipy.sparse import issparse
 from numbers import Integral
 from sklearn import get_config
 from sklearn.utils import check_scalar
 from ...utils._openmp_helpers import _openmp_effective_n_threads
-from ...utils._typedefs import DTYPE
+from ...utils._typedefs import DTYPE, SPARSE_INDEX_TYPE
 
 cnp.import_array()
 
@@ -102,16 +103,40 @@ cdef DTYPE_t[::1] _sqeuclidean_row_norms32_dense(
     return squared_row_norms
 
 
+cdef DTYPE_t[::1] _sqeuclidean_row_norms64_sparse(
+    const DTYPE_t[:] X_data,
+    const SPARSE_INDEX_TYPE_t[:] X_indptr,
+    ITYPE_t num_threads,
+):
+    cdef:
+        ITYPE_t n = X_indptr.shape[0] - 1
+        SPARSE_INDEX_TYPE_t X_i_ptr, idx = 0
+        DTYPE_t[::1] squared_row_norms = np.zeros(n, dtype=DTYPE)
+
+    for idx in prange(n, schedule='static', nogil=True, num_threads=num_threads):
+        for X_i_ptr in range(X_indptr[idx], X_indptr[idx+1]):
+            squared_row_norms[idx] += X_data[X_i_ptr] * X_data[X_i_ptr]
+
+    return squared_row_norms
+
+
 {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
 
 from ._datasets_pair cimport DatasetsPair{{name_suffix}}
 
 
 cpdef DTYPE_t[::1] _sqeuclidean_row_norms{{name_suffix}}(
-    const {{INPUT_DTYPE_t}}[:, ::1] X,
+    X,
     ITYPE_t num_threads,
 ):
-    return _sqeuclidean_row_norms{{name_suffix}}_dense(X, num_threads)
+    if issparse(X):
+        # TODO: remove this instruction which is a cast in the float32 case
+        # by moving squared row norms computations in MiddleTermComputer. 
+        X_data = np.asarray(X.data, dtype=DTYPE)
+        X_indptr = np.asarray(X.indptr, dtype=SPARSE_INDEX_TYPE)
+        return _sqeuclidean_row_norms64_sparse(X_data, X_indptr, num_threads)
+    else:
+        return _sqeuclidean_row_norms{{name_suffix}}_dense(X, num_threads)
 
 
 cdef class BaseDistancesReduction{{name_suffix}}:
@@ -131,7 +156,7 @@ cdef class BaseDistancesReduction{{name_suffix}}:
         strategy=None,
      ):
         cdef:
-            ITYPE_t n_samples_chunk, X_n_full_chunks, Y_n_full_chunks
+            ITYPE_t X_n_full_chunks, Y_n_full_chunks
 
         if chunk_size is None:
             chunk_size = get_config().get("pairwise_dist_chunk_size", 256)

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
@@ -8,10 +8,7 @@
 
 from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING
 
-from ._base import (
-    _sqeuclidean_row_norms64,
-    _sqeuclidean_row_norms32,
-)
+from ._base import _sqeuclidean_row_norms32, _sqeuclidean_row_norms64
 from ._argkmin import (
     ArgKmin64,
     ArgKmin32,
@@ -133,8 +130,10 @@ def is_valid_sparse_matrix(X):
         # See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669  # noqa
         # TODO: implement specialisation for (sq)euclidean on fused sparse-dense
         # using sparse-dense routines for matrix-vector multiplications.
+        # Currently, only dense-dense and sparse-sparse are optimized for
+        # the Euclidean case.
         fused_sparse_dense_euclidean_case_guard = not (
-            (is_valid_sparse_matrix(X) or is_valid_sparse_matrix(Y))
+            (is_valid_sparse_matrix(X) ^ is_valid_sparse_matrix(Y))  # "^" is XOR
             and isinstance(metric, str)
             and "euclidean" in metric
         )

diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp
@@ -17,7 +17,22 @@ cimport numpy as cnp
 
 from libcpp.vector cimport vector
 
-from ...utils._typedefs cimport DTYPE_t, ITYPE_t
+from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t
+
+
+cdef void _middle_term_sparse_sparse_64(
+    const DTYPE_t[:] X_data,
+    const SPARSE_INDEX_TYPE_t[:] X_indices,
+    const SPARSE_INDEX_TYPE_t[:] X_indptr,
+    ITYPE_t X_start,
+    ITYPE_t X_end,
+    const DTYPE_t[:] Y_data,
+    const SPARSE_INDEX_TYPE_t[:] Y_indices,
+    const SPARSE_INDEX_TYPE_t[:] Y_indptr,
+    ITYPE_t Y_start,
+    ITYPE_t Y_end,
+    DTYPE_t * D,
+) nogil
 
 
 {{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
@@ -133,4 +148,42 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_
     ) nogil
 
 
+cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}):
+    cdef:
+        const DTYPE_t[:] X_data
+        const SPARSE_INDEX_TYPE_t[:] X_indices
+        const SPARSE_INDEX_TYPE_t[:] X_indptr
+
+        const DTYPE_t[:] Y_data
+        const SPARSE_INDEX_TYPE_t[:] Y_indices
+        const SPARSE_INDEX_TYPE_t[:] Y_indptr
+
+    cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
+        self,
+        ITYPE_t X_start,
+        ITYPE_t X_end,
+        ITYPE_t Y_start,
+        ITYPE_t Y_end,
+        ITYPE_t thread_num
+    ) nogil
+
+    cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
+        self,
+        ITYPE_t X_start,
+        ITYPE_t X_end,
+        ITYPE_t Y_start,
+        ITYPE_t Y_end,
+        ITYPE_t thread_num
+    ) nogil
+
+    cdef DTYPE_t * _compute_dist_middle_terms(
+        self,
+        ITYPE_t X_start,
+        ITYPE_t X_end,
+        ITYPE_t Y_start,
+        ITYPE_t Y_end,
+        ITYPE_t thread_num,
+    ) nogil
+
+
 {{endfor}}