From dc5d80263296293ebd2e7015f4bf0174e7fe85d0 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Tue, 4 Jul 2023 14:57:41 -0400 Subject: [PATCH 1/5] Initial changes --- sklearn/metrics/_dist_metrics.pxd.tp | 8 +- sklearn/metrics/_dist_metrics.pyx.tp | 132 +++++++++--------- .../_datasets_pair.pyx.tp | 16 +-- 3 files changed, 78 insertions(+), 78 deletions(-) diff --git a/sklearn/metrics/_dist_metrics.pxd.tp b/sklearn/metrics/_dist_metrics.pxd.tp index 60b8da3ecfa46..d8d3c99ade824 100644 --- a/sklearn/metrics/_dist_metrics.pxd.tp +++ b/sklearn/metrics/_dist_metrics.pxd.tp @@ -88,9 +88,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -101,9 +101,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp index bc54e51a7511a..a70a9bb231095 100644 --- a/sklearn/metrics/_dist_metrics.pyx.tp +++ b/sklearn/metrics/_dist_metrics.pyx.tp @@ -393,9 +393,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -459,9 +459,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -523,9 +523,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): x2_end = x1_indptr[i2 + 1] D[i1, i2] = D[i2, i1] = self.dist_csr( x1_data, - x1_indices, + &x1_indices[0], x1_data, - x1_indices, + &x1_indices[0], x1_start, x1_end, x2_start, @@ -562,9 +562,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): D[i1, i2] = self.dist_csr( x1_data, - x1_indices, + &x1_indices[0], x2_data, - x2_indices, + &x2_indices[0], x1_start, x1_end, x2_start, @@ -735,9 +735,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): Darr[i1, i2] = self.dist_csr( x1_data=&X_data[0], - x1_indices=X_indices, + x1_indices=&X_indices[0], x2_data=x2_data, - x2_indices=Y_indices, + x2_indices=&Y_indices[0], x1_start=x1_start, x1_end=x1_end, x2_start=0, @@ -801,9 +801,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): Darr[i1, i2] = self.dist_csr( x1_data=x1_data, - x1_indices=X_indices, + x1_indices=&X_indices[0], x2_data=&Y_data[0], - x2_indices=Y_indices, + x2_indices=&Y_indices[0], x1_start=0, x1_end=n_features, x2_start=x2_start, @@ -896,9 +896,9 @@ cdef class EuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -948,9 +948,9 @@ cdef class EuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1024,9 +1024,9 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1077,9 +1077,9 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1126,9 +1126,9 @@ cdef class ManhattanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1210,9 +1210,9 @@ cdef class ChebyshevDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1347,9 +1347,9 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1427,9 +1427,9 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1539,9 +1539,9 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1593,9 +1593,9 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1644,9 +1644,9 @@ cdef class HammingDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1719,9 +1719,9 @@ cdef class CanberraDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1796,9 +1796,9 @@ cdef class BrayCurtisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1880,9 +1880,9 @@ cdef class JaccardDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -1963,9 +1963,9 @@ cdef class MatchingDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -2039,9 +2039,9 @@ cdef class DiceDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -2120,9 +2120,9 @@ cdef class KulsinskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -2198,9 +2198,9 @@ cdef class RogersTanimotoDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -2275,9 +2275,9 @@ cdef class RussellRaoDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -2345,9 +2345,9 @@ cdef class SokalMichenerDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -2423,9 +2423,9 @@ cdef class SokalSneathDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, @@ -2525,16 +2525,16 @@ cdef class HaversineDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): return tmp * tmp cdef inline float64_t dist_csr( - self, - const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, - const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, - const int32_t x1_start, - const int32_t x1_end, - const int32_t x2_start, - const int32_t x2_end, - const intp_t size, + self, + const {{INPUT_DTYPE_t}}* x1_data, + const int32_t* x1_indices, + const {{INPUT_DTYPE_t}}* x2_data, + const int32_t* x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: return 2 * asin(sqrt(self.rdist_csr( x1_data, @@ -2551,9 +2551,9 @@ cdef class HaversineDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, const int32_t x1_start, const int32_t x1_end, const int32_t x2_start, diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp index 40a9a45e8b8e1..2fb258741c555 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp @@ -231,9 +231,9 @@ cdef class SparseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.rdist_csr( x1_data=&self.X_data[0], - x1_indices=self.X_indices, + x1_indices=&self.X_indices[0], x2_data=&self.Y_data[0], - x2_indices=self.Y_indices, + x2_indices=&self.Y_indices[0], x1_start=self.X_indptr[i], x1_end=self.X_indptr[i + 1], x2_start=self.Y_indptr[j], @@ -245,9 +245,9 @@ cdef class SparseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef float64_t dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.dist_csr( x1_data=&self.X_data[0], - x1_indices=self.X_indices, + x1_indices=&self.X_indices[0], x2_data=&self.Y_data[0], - x2_indices=self.Y_indices, + x2_indices=&self.Y_indices[0], x1_start=self.X_indptr[i], x1_end=self.X_indptr[i + 1], x2_start=self.Y_indptr[j], @@ -324,11 +324,11 @@ cdef class SparseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.rdist_csr( x1_data=&self.X_data[0], - x1_indices=self.X_indices, + x1_indices=&self.X_indices[0], # Increment the data pointer such that x2_start=0 is aligned with the # j-th row x2_data=&self.Y_data[0] + j * self.n_features, - x2_indices=self.Y_indices, + x2_indices=&self.Y_indices[0], x1_start=self.X_indptr[i], x1_end=self.X_indptr[i + 1], x2_start=0, @@ -341,11 +341,11 @@ cdef class SparseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): return self.distance_metric.dist_csr( x1_data=&self.X_data[0], - x1_indices=self.X_indices, + x1_indices=&self.X_indices[0], # Increment the data pointer such that x2_start=0 is aligned with the # j-th row x2_data=&self.Y_data[0] + j * self.n_features, - x2_indices=self.Y_indices, + x2_indices=&self.Y_indices[0], x1_start=self.X_indptr[i], x1_end=self.X_indptr[i + 1], x2_start=0, From bf001065774e50941f7d0ea2863a4187fd1b4db4 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Fri, 7 Jul 2023 11:41:54 -0400 Subject: [PATCH 2/5] Added changelog entry --- doc/whats_new/v1.4.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 6a5660ee27b2e..c913d453c142f 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -100,3 +100,12 @@ TODO: update at the time of the release. - |Fix| :func:`feature_selection.mutual_info_regression` now correctly computes the result when `X` is of integer dtype. :pr:`26748` by :user:`Yao Xiao `. + + +:mod:`sklearn.metrics` +................... + +- |Performance| Computing pairwise distances for (CSR x CSR) and (CSR x Dense) + datasets is now 1.5x faster by improving the argument passing strategy used + in the computation routines in :class:`metrics.DistanceMetric`. + :pr:`26765` by :user:`Meekail Zain ` From 6672e7210d36629e9285fc7b895e3eaece1116ca Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Fri, 7 Jul 2023 13:26:48 -0400 Subject: [PATCH 3/5] Updated changelog --- doc/whats_new/v1.4.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 667bc52f317d2..5a16534b436ce 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -90,11 +90,10 @@ Changelog :user:`Patrick O'Reilly `. :mod:`sklearn.metrics` -................... +...................... -- |Performance| Computing pairwise distances for (CSR x CSR) and (CSR x Dense) - datasets is now 1.5x faster by improving the argument passing strategy used - in the computation routines in :class:`metrics.DistanceMetric`. +- |Performance| Computing pairwise distances via :class:`metrics.DistanceMetric` + for CSR × CSR, Dense × CSR, and CSR × Dense datasets is now 1.5x faster. :pr:`26765` by :user:`Meekail Zain ` Code and Documentation Contributors From 92f7c126d99a62e54f441f0087ce7382065e1056 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Thu, 20 Jul 2023 12:57:05 -0400 Subject: [PATCH 4/5] Enforce contiguity wherever possible --- sklearn/metrics/_dist_metrics.pxd.tp | 12 +++---- sklearn/metrics/_dist_metrics.pyx.tp | 36 +++++++++---------- .../_datasets_pair.pxd.tp | 14 ++++---- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/sklearn/metrics/_dist_metrics.pxd.tp b/sklearn/metrics/_dist_metrics.pxd.tp index d8d3c99ade824..fb3cb69339321 100644 --- a/sklearn/metrics/_dist_metrics.pxd.tp +++ b/sklearn/metrics/_dist_metrics.pxd.tp @@ -127,8 +127,8 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef int pdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, - const int32_t[:] x1_indptr, + const int32_t[::1] x1_indices, + const int32_t[::1] x1_indptr, const intp_t size, float64_t[:, ::1] D, ) except -1 nogil @@ -136,11 +136,11 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef int cdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, - const int32_t[:] x1_indptr, + const int32_t[::1] x1_indices, + const int32_t[::1] x1_indptr, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, - const int32_t[:] x2_indptr, + const int32_t[::1] x2_indices, + const int32_t[::1] x2_indptr, const intp_t size, float64_t[:, ::1] D, ) except -1 nogil diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp index a70a9bb231095..172a819450ef8 100644 --- a/sklearn/metrics/_dist_metrics.pyx.tp +++ b/sklearn/metrics/_dist_metrics.pyx.tp @@ -423,9 +423,9 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, + const int32_t* x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, + const int32_t* x2_indices, ) except -1 nogil: Where callers would use slicing on the original CSR data and indices @@ -500,8 +500,8 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef int pdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, - const int32_t[:] x1_indptr, + const int32_t[::1] x1_indices, + const int32_t[::1] x1_indptr, const intp_t size, float64_t[:, ::1] D, ) except -1 nogil: @@ -537,11 +537,11 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef int cdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const int32_t[:] x1_indices, - const int32_t[:] x1_indptr, + const int32_t[::1] x1_indices, + const int32_t[::1] x1_indptr, const {{INPUT_DTYPE_t}}* x2_data, - const int32_t[:] x2_indices, - const int32_t[:] x2_indptr, + const int32_t[::1] x2_indices, + const int32_t[::1] x2_indptr, const intp_t size, float64_t[:, ::1] D, ) except -1 nogil: @@ -642,13 +642,13 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef: intp_t n_X, n_features const {{INPUT_DTYPE_t}}[:] X_data - const int32_t[:] X_indices - const int32_t[:] X_indptr + const int32_t[::1] X_indices + const int32_t[::1] X_indptr intp_t n_Y const {{INPUT_DTYPE_t}}[:] Y_data - const int32_t[:] Y_indices - const int32_t[:] Y_indptr + const int32_t[::1] Y_indices + const int32_t[::1] Y_indptr float64_t[:, ::1] Darr @@ -693,10 +693,10 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): const {{INPUT_DTYPE_t}}[:] X_data = np.asarray( X.data, dtype={{INPUT_DTYPE}}, ) - const int32_t[:] X_indices = np.asarray( + const int32_t[::1] X_indices = np.asarray( X.indices, dtype=np.int32, ) - const int32_t[:] X_indptr = np.asarray( + const int32_t[::1] X_indptr = np.asarray( X.indptr, dtype=np.int32, ) @@ -704,7 +704,7 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): Y, dtype={{INPUT_DTYPE}}, order="C", ) intp_t n_Y = Y_data.shape[0] - const int32_t[:] Y_indices = ( + const int32_t[::1] Y_indices = ( np.arange(n_features, dtype=np.int32) ) @@ -758,7 +758,7 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): const {{INPUT_DTYPE_t}}[:, ::1] X_data = np.asarray( X, dtype={{INPUT_DTYPE}}, order="C", ) - const int32_t[:] X_indices = np.arange( + const int32_t[::1] X_indices = np.arange( n_features, dtype=np.int32, ) @@ -766,10 +766,10 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): const {{INPUT_DTYPE_t}}[:] Y_data = np.asarray( Y.data, dtype={{INPUT_DTYPE}}, ) - const int32_t[:] Y_indices = np.asarray( + const int32_t[::1] Y_indices = np.asarray( Y.indices, dtype=np.int32, ) - const int32_t[:] Y_indptr = np.asarray( + const int32_t[::1] Y_indptr = np.asarray( Y.indptr, dtype=np.int32, ) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp index fc56a59cab16f..1e57b3291a8f4 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp @@ -38,22 +38,22 @@ cdef class DenseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef class SparseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef: const {{INPUT_DTYPE_t}}[:] X_data - const int32_t[:] X_indices - const int32_t[:] X_indptr + const int32_t[::1] X_indices + const int32_t[::1] X_indptr const {{INPUT_DTYPE_t}}[:] Y_data - const int32_t[:] Y_indices - const int32_t[:] Y_indptr + const int32_t[::1] Y_indices + const int32_t[::1] Y_indptr cdef class SparseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef: const {{INPUT_DTYPE_t}}[:] X_data - const int32_t[:] X_indices - const int32_t[:] X_indptr + const int32_t[::1] X_indices + const int32_t[::1] X_indptr const {{INPUT_DTYPE_t}}[:] Y_data - const int32_t[:] Y_indices + const int32_t[::1] Y_indices intp_t n_Y From 183b44b83ed3a0da2bc5543e2025a9d436737e48 Mon Sep 17 00:00:00 2001 From: Meekail Zain Date: Fri, 21 Jul 2023 16:25:59 -0400 Subject: [PATCH 5/5] Enforced continguous arrays --- sklearn/metrics/_dist_metrics.pyx.tp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp index 172a819450ef8..332b86fb26b85 100644 --- a/sklearn/metrics/_dist_metrics.pyx.tp +++ b/sklearn/metrics/_dist_metrics.pyx.tp @@ -641,12 +641,12 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): def _pairwise_sparse_sparse(self, X: csr_matrix , Y: csr_matrix): cdef: intp_t n_X, n_features - const {{INPUT_DTYPE_t}}[:] X_data + const {{INPUT_DTYPE_t}}[::1] X_data const int32_t[::1] X_indices const int32_t[::1] X_indptr intp_t n_Y - const {{INPUT_DTYPE_t}}[:] Y_data + const {{INPUT_DTYPE_t}}[::1] Y_data const int32_t[::1] Y_indices const int32_t[::1] Y_indptr @@ -690,7 +690,7 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): cdef: intp_t n_X = X.shape[0] intp_t n_features = X.shape[1] - const {{INPUT_DTYPE_t}}[:] X_data = np.asarray( + const {{INPUT_DTYPE_t}}[::1] X_data = np.asarray( X.data, dtype={{INPUT_DTYPE}}, ) const int32_t[::1] X_indices = np.asarray( @@ -763,7 +763,7 @@ cdef class DistanceMetric{{name_suffix}}(DistanceMetric): ) intp_t n_Y = Y.shape[0] - const {{INPUT_DTYPE_t}}[:] Y_data = np.asarray( + const {{INPUT_DTYPE_t}}[::1] Y_data = np.asarray( Y.data, dtype={{INPUT_DTYPE}}, ) const int32_t[::1] Y_indices = np.asarray(