diff --git a/setup.py b/setup.py index f5522600f623f..e36848a316b69 100755 --- a/setup.py +++ b/setup.py @@ -265,7 +265,6 @@ def check_package_status(package, min_version): { "sources": ["_middle_term_computer.pyx.tp", "_middle_term_computer.pxd.tp"], "language": "c++", - "include_np": True, "extra_compile_args": ["-std=c++11"], }, { @@ -397,9 +396,9 @@ def check_package_status(package, min_version): }, {"sources": ["_random.pyx"], "include_np": True}, {"sources": ["_logistic_sigmoid.pyx"], "include_np": True}, - {"sources": ["_typedefs.pyx"], "include_np": True}, - {"sources": ["_heap.pyx"], "include_np": True}, - {"sources": ["_sorting.pyx"], "include_np": True}, + {"sources": ["_typedefs.pyx"]}, + {"sources": ["_heap.pyx"]}, + {"sources": ["_sorting.pyx"]}, {"sources": ["_vector_sentinel.pyx"], "language": "c++", "include_np": True}, {"sources": ["_isfinite.pyx"]}, ], diff --git a/sklearn/cluster/_hierarchical_fast.pyx b/sklearn/cluster/_hierarchical_fast.pyx index 99f0b3c0f0235..4ba895ddcf352 100644 --- a/sklearn/cluster/_hierarchical_fast.pyx +++ b/sklearn/cluster/_hierarchical_fast.pyx @@ -5,7 +5,7 @@ cimport cython from ..metrics._dist_metrics cimport DistanceMetric from ..utils._fast_dict cimport IntFloatDict -from ..utils._typedefs cimport float64_t, intp_t, bool_t +from ..utils._typedefs cimport float64_t, intp_t, uint8_t # C++ from cython.operator cimport dereference as deref, preincrement as inc @@ -119,7 +119,7 @@ def _get_parents( nodes, heads, const intp_t[:] parents, - bool_t[::1] not_visited + uint8_t[::1] not_visited ): """Returns the heads of the given nodes, as defined by parents. @@ -465,7 +465,7 @@ def mst_linkage_core( """ cdef: intp_t n_samples = raw_data.shape[0] - bool_t[:] in_tree = np.zeros(n_samples, dtype=bool) + uint8_t[:] in_tree = np.zeros(n_samples, dtype=bool) float64_t[:, ::1] result = np.zeros((n_samples - 1, 3)) intp_t current_node = 0 diff --git a/sklearn/metrics/_dist_metrics.pxd.tp b/sklearn/metrics/_dist_metrics.pxd.tp index cb1aec99b2e9a..511e0941a7098 100644 --- a/sklearn/metrics/_dist_metrics.pxd.tp +++ b/sklearn/metrics/_dist_metrics.pxd.tp @@ -9,26 +9,21 @@ implementation_specific_values = [ # for the float64 case as to still be able to expose the original # float64 implementation under the same API, namely `DistanceMetric`. # - # On the other hand, '32' bit is used for `name_suffix` for the float32 + # On the other hand, '32' is used for `name_suffix` for the float32 # case to remove ambiguity and use `DistanceMetric32`, which is not # publicly exposed. # # The metric mapping is adapted accordingly to route to the correct # implementations. # - # We also use 64bit types as defined in `sklearn.utils._typedefs` - # to maintain backward compatibility at the symbol level for extra - # safety. - # - ('', 'DTYPE_t', 'DTYPE'), - ('32', 'cnp.float32_t', 'np.float32') + ('', 'float64_t', 'np.float64'), + ('32', 'float32_t', 'np.float32') ] }} -cimport numpy as cnp from libc.math cimport sqrt, exp -from ..utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t +from ..utils._typedefs cimport float64_t, float32_t, int32_t, intp_t {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} @@ -37,37 +32,37 @@ from ..utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t # # We use these for the default (euclidean) case so that they can be # inlined. This leads to faster computation for the most common case -cdef inline DTYPE_t euclidean_dist{{name_suffix}}( +cdef inline float64_t euclidean_dist{{name_suffix}}( const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t tmp, d=0 - cdef cnp.intp_t j + cdef float64_t tmp, d=0 + cdef intp_t j for j in range(size): - tmp = (x1[j] - x2[j]) + tmp = (x1[j] - x2[j]) d += tmp * tmp return sqrt(d) -cdef inline DTYPE_t euclidean_rdist{{name_suffix}}( +cdef inline float64_t euclidean_rdist{{name_suffix}}( const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t tmp, d=0 - cdef cnp.intp_t j + cdef float64_t tmp, d=0 + cdef intp_t j for j in range(size): - tmp = (x1[j] - x2[j]) + tmp = (x1[j] - x2[j]) d += tmp * tmp return d -cdef inline DTYPE_t euclidean_dist_to_rdist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil: +cdef inline float64_t euclidean_dist_to_rdist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil: return dist * dist -cdef inline DTYPE_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil: +cdef inline float64_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil: return sqrt(dist) @@ -78,89 +73,89 @@ cdef class DistanceMetric{{name_suffix}}: # we must define them here so that cython's limited polymorphism will work. # Because we don't expect to instantiate a lot of these objects, the # extra memory overhead of this setup should not be an issue. - cdef DTYPE_t p - cdef const DTYPE_t[::1] vec - cdef const DTYPE_t[:, ::1] mat - cdef ITYPE_t size + cdef float64_t p + cdef const float64_t[::1] vec + cdef const float64_t[:, ::1] mat + cdef intp_t size cdef object func cdef object kwargs - cdef DTYPE_t dist( + cdef float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil - cdef DTYPE_t rdist( + cdef float64_t rdist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil - cdef DTYPE_t dist_csr( + cdef float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil - cdef DTYPE_t rdist_csr( + cdef float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil cdef int pdist( self, const {{INPUT_DTYPE_t}}[:, ::1] X, - DTYPE_t[:, ::1] D, + float64_t[:, ::1] D, ) except -1 cdef int cdist( self, const {{INPUT_DTYPE_t}}[:, ::1] X, const {{INPUT_DTYPE_t}}[:, ::1] Y, - DTYPE_t[:, ::1] D, + float64_t[:, ::1] D, ) except -1 cdef int pdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, - const SPARSE_INDEX_TYPE_t[:] x1_indptr, - const ITYPE_t size, - DTYPE_t[:, ::1] D, + const int32_t[:] x1_indices, + const int32_t[:] x1_indptr, + const intp_t size, + float64_t[:, ::1] D, ) except -1 nogil cdef int cdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, - const SPARSE_INDEX_TYPE_t[:] x1_indptr, + const int32_t[:] x1_indices, + const int32_t[:] x1_indptr, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t[:] x2_indptr, - const ITYPE_t size, - DTYPE_t[:, ::1] D, + const int32_t[:] x2_indices, + const int32_t[:] x2_indptr, + const intp_t size, + float64_t[:, ::1] D, ) except -1 nogil - cdef DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil + cdef float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil - cdef DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil + cdef float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil {{endfor}} diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp index 4d2ab3251b56e..ed09552ed5914 100644 --- a/sklearn/metrics/_dist_metrics.pyx.tp +++ b/sklearn/metrics/_dist_metrics.pyx.tp @@ -5,10 +5,9 @@ implementation_specific_values = [ # # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE # - # # On the first hand, an empty string is used for `name_suffix` - # for the 64bit case as to still be able to expose the original - # 64bit implementation under the same API, namely `DistanceMetric`. + # for the float64 case as to still be able to expose the original + # float64 implementation under the same API, namely `DistanceMetric`. # # On the other hand, '32' bit is used for `name_suffix` for the float32 # case to remove ambiguity and use `DistanceMetric32`, which is not @@ -17,12 +16,8 @@ implementation_specific_values = [ # The metric mapping is adapted accordingly to route to the correct # implementations. # - # We also use 64bit types as defined in `sklearn.utils._typedefs` - # to maintain backward compatibility at the symbol level for extra - # safety. - # - ('', 'DTYPE_t', 'DTYPE'), - ('32', 'cnp.float32_t', 'np.float32') + ('', 'float64_t', 'np.float64'), + ('32', 'float32_t', 'np.float32') ] }} @@ -38,8 +33,7 @@ cnp.import_array() # required in order to use C-API from libc.math cimport fabs, sqrt, exp, pow, cos, sin, asin from scipy.sparse import csr_matrix, issparse -from ..utils._typedefs cimport DTYPE_t, ITYPE_t, DTYPECODE -from ..utils._typedefs import DTYPE, ITYPE +from ..utils._typedefs cimport float64_t, float32_t, int32_t, intp_t from ..utils import check_array from ..utils.fixes import parse_version, sp_base_version @@ -80,7 +74,6 @@ def get_valid_metric_ids(L): return [key for (key, val) in METRIC_MAPPING.items() if (val.__name__ in L) or (val in L)] -from ..utils._typedefs import SPARSE_INDEX_TYPE {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} @@ -115,14 +108,15 @@ METRIC_MAPPING{{name_suffix}} = { 'pyfunc': PyFuncDistance{{name_suffix}}, } -cdef inline object _buffer_to_ndarray{{name_suffix}}(const {{INPUT_DTYPE_t}}* x, cnp.npy_intp n): +cdef inline object _buffer_to_ndarray{{name_suffix}}(const {{INPUT_DTYPE_t}}* x, intp_t n): # Wrap a memory buffer with an ndarray. Warning: this is not robust. # In particular, if x is deallocated before the returned array goes # out of scope, this could cause memory errors. Since there is not # a possibility of this for our use-case, this should be safe. # Note: this Segfaults unless np.import_array() is called above - return cnp.PyArray_SimpleNewFromData(1, &n, DTYPECODE, x) + # TODO: remove the explicit cast to cnp.intp_t* when cython min version >= 3.0 + return cnp.PyArray_SimpleNewFromData(1, &n, cnp.NPY_FLOAT64, x) cdef {{INPUT_DTYPE_t}} INF{{name_suffix}} = np.inf @@ -245,8 +239,8 @@ cdef class DistanceMetric{{name_suffix}}: """ def __cinit__(self): self.p = 2 - self.vec = np.zeros(1, dtype=DTYPE, order='C') - self.mat = np.zeros((1, 1), dtype=DTYPE, order='C') + self.vec = np.zeros(1, dtype=np.float64, order='C') + self.mat = np.zeros((1, 1), dtype=np.float64, order='C') self.size = 1 def __reduce__(self): @@ -330,11 +324,11 @@ cdef class DistanceMetric{{name_suffix}}: """ return - cdef DTYPE_t dist( + cdef float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: """Compute the distance between vectors x1 and x2 @@ -342,11 +336,11 @@ cdef class DistanceMetric{{name_suffix}}: """ return -999 - cdef DTYPE_t rdist( + cdef float64_t rdist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: """Compute the rank-preserving surrogate distance between vectors x1 and x2. @@ -362,10 +356,10 @@ cdef class DistanceMetric{{name_suffix}}: cdef int pdist( self, const {{INPUT_DTYPE_t}}[:, ::1] X, - DTYPE_t[:, ::1] D, + float64_t[:, ::1] D, ) except -1: """Compute the pairwise distances between points in X""" - cdef ITYPE_t i1, i2 + cdef intp_t i1, i2 for i1 in range(X.shape[0]): for i2 in range(i1, X.shape[0]): D[i1, i2] = self.dist(&X[i1, 0], &X[i2, 0], X.shape[1]) @@ -377,10 +371,10 @@ cdef class DistanceMetric{{name_suffix}}: self, const {{INPUT_DTYPE_t}}[:, ::1] X, const {{INPUT_DTYPE_t}}[:, ::1] Y, - DTYPE_t[:, ::1] D, + float64_t[:, ::1] D, ) except -1: """Compute the cross-pairwise distances between arrays X and Y""" - cdef ITYPE_t i1, i2 + cdef intp_t i1, i2 if X.shape[1] != Y.shape[1]: raise ValueError('X and Y must have the same second dimension') for i1 in range(X.shape[0]): @@ -388,17 +382,17 @@ cdef class DistanceMetric{{name_suffix}}: D[i1, i2] = self.dist(&X[i1, 0], &Y[i2, 0], X.shape[1]) return 0 - cdef DTYPE_t dist_csr( + cdef float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: """Compute the distance between vectors x1 and x2 represented under the CSR format. @@ -418,12 +412,12 @@ cdef class DistanceMetric{{name_suffix}}: 2. An alternative signature would be: - cdef DTYPE_t dist_csr( + cdef float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, + const int32_t[:] x2_indices, ) except -1 nogil: Where callers would use slicing on the original CSR data and indices @@ -454,17 +448,17 @@ cdef class DistanceMetric{{name_suffix}}: """ return -999 - cdef DTYPE_t rdist_csr( + cdef float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: """Distance between rows of CSR matrices x1 and x2. @@ -498,10 +492,10 @@ cdef class DistanceMetric{{name_suffix}}: cdef int pdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, - const SPARSE_INDEX_TYPE_t[:] x1_indptr, - const ITYPE_t size, - DTYPE_t[:, ::1] D, + const int32_t[:] x1_indices, + const int32_t[:] x1_indptr, + const intp_t size, + float64_t[:, ::1] D, ) except -1 nogil: """Pairwise distances between rows in CSR matrix X. @@ -509,9 +503,9 @@ cdef class DistanceMetric{{name_suffix}}: because it leverages the symmetry of the problem. """ cdef: - ITYPE_t i1, i2 - ITYPE_t n_x1 = x1_indptr.shape[0] - 1 - ITYPE_t x1_start, x1_end, x2_start, x2_end + intp_t i1, i2 + intp_t n_x1 = x1_indptr.shape[0] - 1 + intp_t x1_start, x1_end, x2_start, x2_end for i1 in range(n_x1): x1_start = x1_indptr[i1] @@ -535,21 +529,21 @@ cdef class DistanceMetric{{name_suffix}}: cdef int cdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, - const SPARSE_INDEX_TYPE_t[:] x1_indptr, + const int32_t[:] x1_indices, + const int32_t[:] x1_indptr, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t[:] x2_indptr, - const ITYPE_t size, - DTYPE_t[:, ::1] D, + const int32_t[:] x2_indices, + const int32_t[:] x2_indptr, + const intp_t size, + float64_t[:, ::1] D, ) except -1 nogil: """Compute the cross-pairwise distances between arrays X and Y represented in the CSR format.""" cdef: - ITYPE_t i1, i2 - ITYPE_t n_x1 = x1_indptr.shape[0] - 1 - ITYPE_t n_x2 = x2_indptr.shape[0] - 1 - ITYPE_t x1_start, x1_end, x2_start, x2_end + intp_t i1, i2 + intp_t n_x1 = x1_indptr.shape[0] - 1 + intp_t n_x2 = x2_indptr.shape[0] - 1 + intp_t x1_start, x1_end, x2_start, x2_end for i1 in range(n_x1): x1_start = x1_indptr[i1] @@ -571,11 +565,11 @@ cdef class DistanceMetric{{name_suffix}}: ) return 0 - cdef DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: + cdef float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: """Convert the rank-preserving surrogate distance to the distance""" return rdist - cdef DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: + cdef float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: """Convert the distance to the rank-preserving surrogate distance""" return dist @@ -622,41 +616,41 @@ cdef class DistanceMetric{{name_suffix}}: def _pairwise_dense_dense(self, X, Y): cdef const {{INPUT_DTYPE_t}}[:, ::1] Xarr cdef const {{INPUT_DTYPE_t}}[:, ::1] Yarr - cdef DTYPE_t[:, ::1] Darr + cdef float64_t[:, ::1] Darr Xarr = np.asarray(X, dtype={{INPUT_DTYPE}}, order='C') self._validate_data(Xarr) if X is Y: - Darr = np.empty((Xarr.shape[0], Xarr.shape[0]), dtype=DTYPE, order='C') + Darr = np.empty((Xarr.shape[0], Xarr.shape[0]), dtype=np.float64, order='C') self.pdist(Xarr, Darr) else: Yarr = np.asarray(Y, dtype={{INPUT_DTYPE}}, order='C') self._validate_data(Yarr) - Darr = np.empty((Xarr.shape[0], Yarr.shape[0]), dtype=DTYPE, order='C') + Darr = np.empty((Xarr.shape[0], Yarr.shape[0]), dtype=np.float64, order='C') self.cdist(Xarr, Yarr, Darr) return np.asarray(Darr) def _pairwise_sparse_sparse(self, X: csr_matrix , Y: csr_matrix): cdef: - ITYPE_t n_X, n_features + intp_t n_X, n_features const {{INPUT_DTYPE_t}}[:] X_data - const SPARSE_INDEX_TYPE_t[:] X_indices - const SPARSE_INDEX_TYPE_t[:] X_indptr + const int32_t[:] X_indices + const int32_t[:] X_indptr - ITYPE_t n_Y + intp_t n_Y const {{INPUT_DTYPE_t}}[:] Y_data - const SPARSE_INDEX_TYPE_t[:] Y_indices - const SPARSE_INDEX_TYPE_t[:] Y_indptr + const int32_t[:] Y_indices + const int32_t[:] Y_indptr - DTYPE_t[:, ::1] Darr + float64_t[:, ::1] Darr X_csr = X.tocsr() n_X, n_features = X_csr.shape X_data = np.asarray(X_csr.data, dtype={{INPUT_DTYPE}}) - X_indices = np.asarray(X_csr.indices, dtype=SPARSE_INDEX_TYPE) - X_indptr = np.asarray(X_csr.indptr, dtype=SPARSE_INDEX_TYPE) + X_indices = np.asarray(X_csr.indices, dtype=np.int32) + X_indptr = np.asarray(X_csr.indptr, dtype=np.int32) if X is Y: - Darr = np.empty((n_X, n_X), dtype=DTYPE, order='C') + Darr = np.empty((n_X, n_X), dtype=np.float64, order='C') self.pdist_csr( x1_data=&X_data[0], x1_indices=X_indices, @@ -668,10 +662,10 @@ cdef class DistanceMetric{{name_suffix}}: Y_csr = Y.tocsr() n_Y, _ = Y_csr.shape Y_data = np.asarray(Y_csr.data, dtype={{INPUT_DTYPE}}) - Y_indices = np.asarray(Y_csr.indices, dtype=SPARSE_INDEX_TYPE) - Y_indptr = np.asarray(Y_csr.indptr, dtype=SPARSE_INDEX_TYPE) + Y_indices = np.asarray(Y_csr.indices, dtype=np.int32) + Y_indptr = np.asarray(Y_csr.indptr, dtype=np.int32) - Darr = np.empty((n_X, n_Y), dtype=DTYPE, order='C') + Darr = np.empty((n_X, n_Y), dtype=np.float64, order='C') self.cdist_csr( x1_data=&X_data[0], x1_indices=X_indices, @@ -686,30 +680,30 @@ cdef class DistanceMetric{{name_suffix}}: def _pairwise_sparse_dense(self, X: csr_matrix, Y): cdef: - ITYPE_t n_X = X.shape[0] - ITYPE_t n_features = X.shape[1] + intp_t n_X = X.shape[0] + intp_t n_features = X.shape[1] const {{INPUT_DTYPE_t}}[:] X_data = np.asarray( X.data, dtype={{INPUT_DTYPE}}, ) - const SPARSE_INDEX_TYPE_t[:] X_indices = np.asarray( - X.indices, dtype=SPARSE_INDEX_TYPE, + const int32_t[:] X_indices = np.asarray( + X.indices, dtype=np.int32, ) - const SPARSE_INDEX_TYPE_t[:] X_indptr = np.asarray( - X.indptr, dtype=SPARSE_INDEX_TYPE, + const int32_t[:] X_indptr = np.asarray( + X.indptr, dtype=np.int32, ) const {{INPUT_DTYPE_t}}[:, ::1] Y_data = np.asarray( Y, dtype={{INPUT_DTYPE}}, order="C", ) - ITYPE_t n_Y = Y_data.shape[0] - const SPARSE_INDEX_TYPE_t[:] Y_indices = ( - np.arange(n_features, dtype=SPARSE_INDEX_TYPE) + intp_t n_Y = Y_data.shape[0] + const int32_t[:] Y_indices = ( + np.arange(n_features, dtype=np.int32) ) - DTYPE_t[:, ::1] Darr = np.empty((n_X, n_Y), dtype=DTYPE, order='C') + float64_t[:, ::1] Darr = np.empty((n_X, n_Y), dtype=np.float64, order='C') - ITYPE_t i1, i2 - ITYPE_t x1_start, x1_end + intp_t i1, i2 + intp_t x1_start, x1_end {{INPUT_DTYPE_t}} * x2_data with nogil: @@ -750,33 +744,33 @@ cdef class DistanceMetric{{name_suffix}}: # swapping argument and by transposing the results, but this would # have come with an extra copy to ensure C-contiguity of the result. cdef: - ITYPE_t n_X = X.shape[0] - ITYPE_t n_features = X.shape[1] + intp_t n_X = X.shape[0] + intp_t n_features = X.shape[1] const {{INPUT_DTYPE_t}}[:, ::1] X_data = np.asarray( X, dtype={{INPUT_DTYPE}}, order="C", ) - const SPARSE_INDEX_TYPE_t[:] X_indices = np.arange( - n_features, dtype=SPARSE_INDEX_TYPE, + const int32_t[:] X_indices = np.arange( + n_features, dtype=np.int32, ) - ITYPE_t n_Y = Y.shape[0] + intp_t n_Y = Y.shape[0] const {{INPUT_DTYPE_t}}[:] Y_data = np.asarray( Y.data, dtype={{INPUT_DTYPE}}, ) - const SPARSE_INDEX_TYPE_t[:] Y_indices = np.asarray( - Y.indices, dtype=SPARSE_INDEX_TYPE, + const int32_t[:] Y_indices = np.asarray( + Y.indices, dtype=np.int32, ) - const SPARSE_INDEX_TYPE_t[:] Y_indptr = np.asarray( - Y.indptr, dtype=SPARSE_INDEX_TYPE, + const int32_t[:] Y_indptr = np.asarray( + Y.indptr, dtype=np.int32, ) - DTYPE_t[:, ::1] Darr = np.empty((n_X, n_Y), dtype=DTYPE, order='C') + float64_t[:, ::1] Darr = np.empty((n_X, n_Y), dtype=np.float64, order='C') - ITYPE_t i1, i2 + intp_t i1, i2 {{INPUT_DTYPE_t}} * x1_data - ITYPE_t x2_start, x2_end + intp_t x2_start, x2_end with nogil: # Use the exact same adaptation for CSR than in SparseDenseDatasetsPair @@ -865,24 +859,24 @@ cdef class EuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def __init__(self): self.p = 2 - cdef inline DTYPE_t dist(self, + cdef inline float64_t dist(self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return euclidean_dist{{name_suffix}}(x1, x2, size) - cdef inline DTYPE_t rdist(self, + cdef inline float64_t rdist(self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return euclidean_rdist{{name_suffix}}(x1, x2, size) - cdef inline DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: + cdef inline float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: return sqrt(rdist) - cdef inline DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: + cdef inline float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: return dist * dist def rdist_to_dist(self, rdist): @@ -891,26 +885,26 @@ cdef class EuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def dist_to_rdist(self, dist): return dist ** 2 - cdef inline DTYPE_t rdist_csr( + cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 - DTYPE_t unsquared = 0 + float64_t d = 0.0 + float64_t unsquared = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -943,17 +937,17 @@ cdef class EuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: return sqrt( self.rdist_csr( @@ -978,7 +972,7 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = \sqrt{ \sum_i \frac{ (x_i - y_i) ^ 2}{V_i} } """ def __init__(self, V): - self.vec = np.asarray(V, dtype=DTYPE) + self.vec = np.asarray(V, dtype=np.float64) self.size = self.vec.shape[0] self.p = 2 @@ -986,31 +980,31 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): if X.shape[1] != self.size: raise ValueError('SEuclidean dist: size of V does not match') - cdef inline DTYPE_t rdist( + cdef inline float64_t rdist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t tmp, d=0 - cdef cnp.intp_t j + cdef float64_t tmp, d=0 + cdef intp_t j for j in range(size): tmp = x1[j] - x2[j] d += (tmp * tmp / self.vec[j]) return d - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return sqrt(self.rdist(x1, x2, size)) - cdef inline DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: + cdef inline float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: return sqrt(rdist) - cdef inline DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: + cdef inline float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: return dist * dist def rdist_to_dist(self, rdist): @@ -1019,26 +1013,26 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def dist_to_rdist(self, dist): return dist ** 2 - cdef inline DTYPE_t rdist_csr( + cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 - DTYPE_t unsquared = 0 + float64_t d = 0.0 + float64_t unsquared = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -1072,17 +1066,17 @@ cdef class SEuclideanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): i1 = i1 + 1 return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: return sqrt( self.rdist_csr( @@ -1109,37 +1103,37 @@ cdef class ManhattanDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def __init__(self): self.p = 1 - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t d = 0 - cdef cnp.intp_t j + cdef float64_t d = 0 + cdef intp_t j for j in range(size): d += fabs(x1[j] - x2[j]) return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 + float64_t d = 0.0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -1192,38 +1186,38 @@ cdef class ChebyshevDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def __init__(self): self.p = INF{{name_suffix}} - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t d = 0 - cdef cnp.intp_t j + cdef float64_t d = 0 + cdef intp_t j for j in range(size): d = fmax(d, fabs(x1[j] - x2[j])) return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 + float64_t d = 0.0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -1289,14 +1283,14 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): self.p = p if w is not None: w_array = check_array( - w, ensure_2d=False, dtype=DTYPE, input_name="w" + w, ensure_2d=False, dtype=np.float64, input_name="w" ) if (w_array < 0).any(): raise ValueError("w cannot contain negative weights") self.vec = w_array self.size = self.vec.shape[0] else: - self.vec = np.asarray([], dtype=DTYPE) + self.vec = np.asarray([], dtype=np.float64) self.size = 0 def _validate_data(self, X): @@ -1305,14 +1299,14 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): f"the number of features ({X.shape[1]}). " f"Currently len(w)={self.size}.") - cdef inline DTYPE_t rdist( + cdef inline float64_t rdist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t d=0 - cdef cnp.intp_t j + cdef float64_t d=0 + cdef intp_t j cdef bint has_w = self.size > 0 if has_w: for j in range(size): @@ -1322,18 +1316,18 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): d += (pow(fabs(x1[j] - x2[j]), self.p)) return d - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return pow(self.rdist(x1, x2, size), 1. / self.p) - cdef inline DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: + cdef inline float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: return pow(rdist, 1. / self.p) - cdef inline DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: + cdef inline float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: return pow(dist, self.p) def rdist_to_dist(self, rdist): @@ -1342,25 +1336,25 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def dist_to_rdist(self, dist): return dist ** self.p - cdef inline DTYPE_t rdist_csr( + cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 + float64_t d = 0.0 bint has_w = self.size > 0 if has_w: @@ -1422,17 +1416,17 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: return pow( self.rdist_csr( @@ -1485,7 +1479,7 @@ cdef class WMinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): raise ValueError("WMinkowskiDistance requires finite p. " "For p=inf, use ChebyshevDistance.") self.p = p - self.vec = np.asarray(w, dtype=DTYPE) + self.vec = np.asarray(w, dtype=np.float64) self.size = self.vec.shape[0] def _validate_data(self, X): @@ -1493,31 +1487,31 @@ cdef class WMinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): raise ValueError('WMinkowskiDistance dist: ' 'size of w does not match') - cdef inline DTYPE_t rdist( + cdef inline float64_t rdist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t d = 0 - cdef cnp.intp_t j + cdef float64_t d = 0 + cdef intp_t j for j in range(size): d += (pow(self.vec[j] * fabs(x1[j] - x2[j]), self.p)) return d - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return pow(self.rdist(x1, x2, size), 1. / self.p) - cdef inline DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: + cdef inline float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: return pow(rdist, 1. / self.p) - cdef inline DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: + cdef inline float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: return pow(dist, self.p) def rdist_to_dist(self, rdist): @@ -1526,25 +1520,25 @@ cdef class WMinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def dist_to_rdist(self, dist): return dist ** self.p - cdef inline DTYPE_t rdist_csr( + cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 + float64_t d = 0.0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -1576,17 +1570,17 @@ cdef class WMinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: return pow( self.rdist_csr( @@ -1621,7 +1615,7 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): optionally specify the inverse directly. If VI is passed, then V is not referenced. """ - cdef DTYPE_t[::1] buffer + cdef float64_t[::1] buffer def __init__(self, V=None, VI=None): if VI is None: @@ -1632,30 +1626,30 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): if VI.ndim != 2 or VI.shape[0] != VI.shape[1]: raise ValueError("V/VI must be square") - self.mat = np.asarray(VI, dtype=DTYPE, order='C') + self.mat = np.asarray(VI, dtype=np.float64, order='C') self.size = self.mat.shape[0] # We need to create a buffer to store the vectors' coordinates' differences - self.buffer = np.zeros(self.size, dtype=DTYPE) + self.buffer = np.zeros(self.size, dtype=np.float64) def __setstate__(self, state): super().__setstate__(state) self.size = self.mat.shape[0] - self.buffer = np.zeros(self.size, dtype=DTYPE) + self.buffer = np.zeros(self.size, dtype=np.float64) def _validate_data(self, X): if X.shape[1] != self.size: raise ValueError('Mahalanobis dist: size of V does not match') - cdef inline DTYPE_t rdist( + cdef inline float64_t rdist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t tmp, d = 0 - cdef cnp.intp_t i, j + cdef float64_t tmp, d = 0 + cdef intp_t i, j # compute (x1 - x2).T * VI * (x1 - x2) for i in range(size): @@ -1668,18 +1662,18 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): d += tmp * self.buffer[i] return d - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return sqrt(self.rdist(x1, x2, size)) - cdef inline DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: + cdef inline float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: return sqrt(rdist) - cdef inline DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: + cdef inline float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: return dist * dist def rdist_to_dist(self, rdist): @@ -1688,25 +1682,25 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): def dist_to_rdist(self, dist): return dist ** 2 - cdef inline DTYPE_t rdist_csr( + cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t tmp, d = 0.0 + float64_t tmp, d = 0.0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -1742,17 +1736,17 @@ cdef class MahalanobisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: return sqrt( self.rdist_csr( @@ -1779,39 +1773,39 @@ cdef class HammingDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): .. math:: D(x, y) = \frac{1}{N} \sum_i \delta_{x_i, y_i} """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int n_unequal = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): if x1[j] != x2[j]: n_unequal += 1 return float(n_unequal) / size - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 + float64_t d = 0.0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -1854,39 +1848,39 @@ cdef class CanberraDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): .. math:: D(x, y) = \sum_i \frac{|x_i - y_i|}{|x_i| + |y_i|} """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t denom, d = 0 - cdef cnp.intp_t j + cdef float64_t denom, d = 0 + cdef intp_t j for j in range(size): denom = fabs(x1[j]) + fabs(x2[j]) if denom > 0: d += fabs(x1[j] - x2[j]) / denom return d - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t d = 0.0 + float64_t d = 0.0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -1929,14 +1923,14 @@ cdef class BrayCurtisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): .. math:: D(x, y) = \frac{\sum_i |x_i - y_i|}{\sum_i(|x_i| + |y_i|)} """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t num = 0, denom = 0 - cdef cnp.intp_t j + cdef float64_t num = 0, denom = 0 + cdef intp_t j for j in range(size): num += fabs(x1[j] - x2[j]) denom += fabs(x1[j]) + fabs(x2[j]) @@ -1945,26 +1939,26 @@ cdef class BrayCurtisDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): else: return 0.0 - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t num = 0.0 - DTYPE_t denom = 0.0 + float64_t num = 0.0 + float64_t denom = 0.0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2009,14 +2003,14 @@ cdef class JaccardDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = (N_TF + N_FT) / (N_TT + N_TF + N_FT) """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_eq = 0, nnz = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 @@ -2029,25 +2023,25 @@ cdef class JaccardDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): return 0 return (nnz - n_eq) * 1.0 / nnz - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_tt = 0, nnz = 0 + intp_t tf1, tf2, n_tt = 0, nnz = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2098,39 +2092,39 @@ cdef class MatchingDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = (N_TF + N_FT) / N """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_neq = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 n_neq += (tf1 != tf2) return n_neq * 1. / size - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_neq = 0 + intp_t tf1, tf2, n_neq = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2173,14 +2167,14 @@ cdef class DiceDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = (N_TF + N_FT) / (2 * N_TT + N_TF + N_FT) """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_neq = 0, n_tt = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 @@ -2188,25 +2182,25 @@ cdef class DiceDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): n_neq += (tf1 != tf2) return n_neq / (2.0 * n_tt + n_neq) - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_tt = 0, n_neq = 0 + intp_t tf1, tf2, n_tt = 0, n_neq = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2254,14 +2248,14 @@ cdef class KulsinskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = 1 - N_TT / (N + N_TF + N_FT) """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_tt = 0, n_neq = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 @@ -2269,25 +2263,25 @@ cdef class KulsinskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): n_tt += (tf1 and tf2) return (n_neq - n_tt + size) * 1.0 / (n_neq + size) - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_tt = 0, n_neq = 0 + intp_t tf1, tf2, n_tt = 0, n_neq = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2333,39 +2327,39 @@ cdef class RogersTanimotoDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = 2 (N_TF + N_FT) / (N + N_TF + N_FT) """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_neq = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 n_neq += (tf1 != tf2) return (2.0 * n_neq) / (size + n_neq) - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_neq = 0 + intp_t tf1, tf2, n_neq = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2410,39 +2404,39 @@ cdef class RussellRaoDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = (N - N_TT) / N """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_tt = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 n_tt += (tf1 and tf2) return (size - n_tt) * 1. / size - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_tt = 0 + intp_t tf1, tf2, n_tt = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2480,39 +2474,39 @@ cdef class SokalMichenerDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = 2 (N_TF + N_FT) / (N + N_TF + N_FT) """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_neq = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 n_neq += (tf1 != tf2) return (2.0 * n_neq) / (size + n_neq) - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_neq = 0 + intp_t tf1, tf2, n_neq = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2557,14 +2551,14 @@ cdef class SokalSneathDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): D(x, y) = (N_TF + N_FT) / (N_TT / 2 + N_FT + N_TF) """ - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: cdef int tf1, tf2, n_tt = 0, n_neq = 0 - cdef cnp.intp_t j + cdef intp_t j for j in range(size): tf1 = x1[j] != 0 tf2 = x2[j] != 0 @@ -2572,25 +2566,25 @@ cdef class SokalSneathDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): n_tt += (tf1 and tf2) return n_neq / (0.5 * n_tt + n_neq) - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - ITYPE_t tf1, tf2, n_tt = 0, n_neq = 0 + intp_t tf1, tf2, n_tt = 0, n_neq = 0 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2646,27 +2640,27 @@ cdef class HaversineDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): raise ValueError("Haversine distance only valid " "in 2 dimensions") - cdef inline DTYPE_t rdist(self, + cdef inline float64_t rdist(self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: - cdef DTYPE_t sin_0 = sin(0.5 * ((x1[0]) - (x2[0]))) - cdef DTYPE_t sin_1 = sin(0.5 * ((x1[1]) - (x2[1]))) + cdef float64_t sin_0 = sin(0.5 * ((x1[0]) - (x2[0]))) + cdef float64_t sin_1 = sin(0.5 * ((x1[1]) - (x2[1]))) return (sin_0 * sin_0 + cos(x1[0]) * cos(x2[0]) * sin_1 * sin_1) - cdef inline DTYPE_t dist(self, + cdef inline float64_t dist(self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return 2 * asin(sqrt(self.rdist(x1, x2, size))) - cdef inline DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: + cdef inline float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil: return 2 * asin(sqrt(rdist)) - cdef inline DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: - cdef DTYPE_t tmp = sin(0.5 * dist) + cdef inline float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil: + cdef float64_t tmp = sin(0.5 * dist) return tmp * tmp def rdist_to_dist(self, rdist): @@ -2676,17 +2670,17 @@ cdef class HaversineDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): tmp = np.sin(0.5 * dist) return tmp * tmp - cdef inline DTYPE_t dist_csr( + cdef inline float64_t dist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: return 2 * asin(sqrt(self.rdist_csr( x1_data, @@ -2700,30 +2694,30 @@ cdef class HaversineDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): size, ))) - cdef inline DTYPE_t rdist_csr( + cdef inline float64_t rdist_csr( self, const {{INPUT_DTYPE_t}}* x1_data, - const SPARSE_INDEX_TYPE_t[:] x1_indices, + const int32_t[:] x1_indices, const {{INPUT_DTYPE_t}}* x2_data, - const SPARSE_INDEX_TYPE_t[:] x2_indices, - const SPARSE_INDEX_TYPE_t x1_start, - const SPARSE_INDEX_TYPE_t x1_end, - const SPARSE_INDEX_TYPE_t x2_start, - const SPARSE_INDEX_TYPE_t x2_end, - const ITYPE_t size, + const int32_t[:] x2_indices, + const int32_t x1_start, + const int32_t x1_end, + const int32_t x2_start, + const int32_t x2_end, + const intp_t size, ) except -1 nogil: cdef: - cnp.npy_intp ix1, ix2 - cnp.npy_intp i1 = x1_start - cnp.npy_intp i2 = x2_start + intp_t ix1, ix2 + intp_t i1 = x1_start + intp_t i2 = x2_start - DTYPE_t x1_0 = 0 - DTYPE_t x1_1 = 0 - DTYPE_t x2_0 = 0 - DTYPE_t x2_1 = 0 - DTYPE_t sin_0 - DTYPE_t sin_1 + float64_t x1_0 = 0 + float64_t x1_1 = 0 + float64_t x2_0 = 0 + float64_t x2_1 = 0 + float64_t sin_0 + float64_t sin_1 while i1 < x1_end and i2 < x2_end: ix1 = x1_indices[i1] @@ -2792,19 +2786,19 @@ cdef class PyFuncDistance{{name_suffix}}(DistanceMetric{{name_suffix}}): # allowed in cython >= 0.26 since it is a redundant GIL acquisition. The # only way to be back compatible is to inherit `dist` from the base class # without GIL and called an inline `_dist` which acquire GIL. - cdef inline DTYPE_t dist( + cdef inline float64_t dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 nogil: return self._dist(x1, x2, size) - cdef inline DTYPE_t _dist( + cdef inline float64_t _dist( self, const {{INPUT_DTYPE_t}}* x1, const {{INPUT_DTYPE_t}}* x2, - ITYPE_t size, + intp_t size, ) except -1 with gil: cdef: object x1arr = _buffer_to_ndarray{{name_suffix}}(x1, size) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx.tp index a7744ab6e6b8f..3d0ea84b0091d 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_argkmin_classmode.pyx.tp @@ -3,10 +3,6 @@ from cython.parallel cimport parallel, prange from libcpp.map cimport map as cpp_map, pair as cpp_pair from libc.stdlib cimport free -cimport numpy as cnp - -cnp.import_array() - from ...utils._typedefs cimport intp_t, float64_t import numpy as np diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp index 416263d1c3134..23337cb2b59d6 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pxd.tp @@ -7,14 +7,12 @@ implementation_specific_values = [ # # We use DistanceMetric for float64 for backward naming compatibility. # - ('64', 'DistanceMetric', 'DTYPE_t'), - ('32', 'DistanceMetric32', 'cnp.float32_t') + ('64', 'DistanceMetric', 'float64_t'), + ('32', 'DistanceMetric32', 'float32_t') ] }} -cimport numpy as cnp - -from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t +from ...utils._typedefs cimport float64_t, float32_t, int32_t, intp_t from ...metrics._dist_metrics cimport DistanceMetric, DistanceMetric32 {{for name_suffix, DistanceMetric, INPUT_DTYPE_t in implementation_specific_values}} @@ -23,15 +21,15 @@ from ...metrics._dist_metrics cimport DistanceMetric, DistanceMetric32 cdef class DatasetsPair{{name_suffix}}: cdef: {{DistanceMetric}} distance_metric - ITYPE_t n_features + intp_t n_features - cdef ITYPE_t n_samples_X(self) noexcept nogil + cdef intp_t n_samples_X(self) noexcept nogil - cdef ITYPE_t n_samples_Y(self) noexcept nogil + cdef intp_t n_samples_Y(self) noexcept nogil - cdef DTYPE_t dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil + cdef float64_t dist(self, intp_t i, intp_t j) noexcept nogil - cdef DTYPE_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil + cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil cdef class DenseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): @@ -43,23 +41,23 @@ cdef class DenseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef class SparseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef: const {{INPUT_DTYPE_t}}[:] X_data - const SPARSE_INDEX_TYPE_t[:] X_indices - const SPARSE_INDEX_TYPE_t[:] X_indptr + const int32_t[:] X_indices + const int32_t[:] X_indptr const {{INPUT_DTYPE_t}}[:] Y_data - const SPARSE_INDEX_TYPE_t[:] Y_indices - const SPARSE_INDEX_TYPE_t[:] Y_indptr + const int32_t[:] Y_indices + const int32_t[:] Y_indptr cdef class SparseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): cdef: const {{INPUT_DTYPE_t}}[:] X_data - const SPARSE_INDEX_TYPE_t[:] X_indices - const SPARSE_INDEX_TYPE_t[:] X_indptr + const int32_t[:] X_indices + const int32_t[:] X_indptr const {{INPUT_DTYPE_t}}[:] Y_data - const SPARSE_INDEX_TYPE_t[:] Y_indices - ITYPE_t n_Y + const int32_t[:] Y_indices + intp_t n_Y cdef class DenseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): diff --git a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp index 5442d2883ac5b..5569c1f231d62 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_datasets_pair.pyx.tp @@ -7,23 +7,20 @@ implementation_specific_values = [ # # We use DistanceMetric for float64 for backward naming compatibility. # - ('64', 'DistanceMetric', 'DTYPE_t', 'DTYPE'), - ('32', 'DistanceMetric32', 'cnp.float32_t', 'np.float32') + ('64', 'DistanceMetric', 'float64_t', 'np.float64'), + ('32', 'DistanceMetric32', 'float32_t', 'np.float32') ] }} import numpy as np -cimport numpy as cnp from cython cimport final -from ...utils._typedefs cimport DTYPE_t, ITYPE_t +from ...utils._typedefs cimport float64_t, float32_t, intp_t from ...metrics._dist_metrics cimport DistanceMetric from scipy.sparse import issparse, csr_matrix -from ...utils._typedefs import DTYPE, SPARSE_INDEX_TYPE -cnp.import_array() {{for name_suffix, DistanceMetric, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} cdef class DatasetsPair{{name_suffix}}: @@ -124,34 +121,34 @@ cdef class DatasetsPair{{name_suffix}}: @classmethod def unpack_csr_matrix(cls, X: csr_matrix): - """Ensure that the CSR matrix is indexed with SPARSE_INDEX_TYPE.""" + """Ensure that the CSR matrix is indexed with np.int32.""" X_data = np.asarray(X.data, dtype={{INPUT_DTYPE}}) - X_indices = np.asarray(X.indices, dtype=SPARSE_INDEX_TYPE) - X_indptr = np.asarray(X.indptr, dtype=SPARSE_INDEX_TYPE) + X_indices = np.asarray(X.indices, dtype=np.int32) + X_indptr = np.asarray(X.indptr, dtype=np.int32) return X_data, X_indices, X_indptr - def __init__(self, {{DistanceMetric}} distance_metric, ITYPE_t n_features): + def __init__(self, {{DistanceMetric}} distance_metric, intp_t n_features): self.distance_metric = distance_metric self.n_features = n_features - cdef ITYPE_t n_samples_X(self) noexcept nogil: + cdef intp_t n_samples_X(self) noexcept nogil: """Number of samples in X.""" # This is a abstract method. # This _must_ always be overwritten in subclasses. # TODO: add "with gil: raise" here when supporting Cython 3.0 return -999 - cdef ITYPE_t n_samples_Y(self) noexcept nogil: + cdef intp_t n_samples_Y(self) noexcept nogil: """Number of samples in Y.""" # This is a abstract method. # This _must_ always be overwritten in subclasses. # TODO: add "with gil: raise" here when supporting Cython 3.0 return -999 - cdef DTYPE_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil: return self.dist(i, j) - cdef DTYPE_t dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t dist(self, intp_t i, intp_t j) noexcept nogil: # This is a abstract method. # This _must_ always be overwritten in subclasses. # TODO: add "with gil: raise" here when supporting Cython 3.0 @@ -186,19 +183,19 @@ cdef class DenseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): self.Y = Y @final - cdef ITYPE_t n_samples_X(self) noexcept nogil: + cdef intp_t n_samples_X(self) noexcept nogil: return self.X.shape[0] @final - cdef ITYPE_t n_samples_Y(self) noexcept nogil: + cdef intp_t n_samples_Y(self) noexcept nogil: return self.Y.shape[0] @final - cdef DTYPE_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.rdist(&self.X[i, 0], &self.Y[j, 0], self.n_features) @final - cdef DTYPE_t dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.dist(&self.X[i, 0], &self.Y[j, 0], self.n_features) @@ -226,15 +223,15 @@ cdef class SparseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): self.Y_data, self.Y_indices, self.Y_indptr = self.unpack_csr_matrix(Y) @final - cdef ITYPE_t n_samples_X(self) noexcept nogil: + cdef intp_t n_samples_X(self) noexcept nogil: return self.X_indptr.shape[0] - 1 @final - cdef ITYPE_t n_samples_Y(self) noexcept nogil: + cdef intp_t n_samples_Y(self) noexcept nogil: return self.Y_indptr.shape[0] - 1 @final - cdef DTYPE_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.rdist_csr( x1_data=&self.X_data[0], x1_indices=self.X_indices, @@ -248,7 +245,7 @@ cdef class SparseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): ) @final - cdef DTYPE_t dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.dist_csr( x1_data=&self.X_data[0], x1_indices=self.X_indices, @@ -316,18 +313,18 @@ cdef class SparseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): # Y array already has been checked here self.n_Y = Y.shape[0] self.Y_data = np.ravel(Y) - self.Y_indices = np.arange(self.n_features, dtype=SPARSE_INDEX_TYPE) + self.Y_indices = np.arange(self.n_features, dtype=np.int32) @final - cdef ITYPE_t n_samples_X(self) noexcept nogil: + cdef intp_t n_samples_X(self) noexcept nogil: return self.X_indptr.shape[0] - 1 @final - cdef ITYPE_t n_samples_Y(self) noexcept nogil: + cdef intp_t n_samples_Y(self) noexcept nogil: return self.n_Y @final - cdef DTYPE_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.rdist_csr( x1_data=&self.X_data[0], x1_indices=self.X_indices, @@ -343,7 +340,7 @@ cdef class SparseDenseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): ) @final - cdef DTYPE_t dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t dist(self, intp_t i, intp_t j) noexcept nogil: return self.distance_metric.dist_csr( x1_data=&self.X_data[0], @@ -383,22 +380,22 @@ cdef class DenseSparseDatasetsPair{{name_suffix}}(DatasetsPair{{name_suffix}}): self.datasets_pair = SparseDenseDatasetsPair{{name_suffix}}(Y, X, distance_metric) @final - cdef ITYPE_t n_samples_X(self) noexcept nogil: + cdef intp_t n_samples_X(self) noexcept nogil: # Swapping interface return self.datasets_pair.n_samples_Y() @final - cdef ITYPE_t n_samples_Y(self) noexcept nogil: + cdef intp_t n_samples_Y(self) noexcept nogil: # Swapping interface return self.datasets_pair.n_samples_X() @final - cdef DTYPE_t surrogate_dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t surrogate_dist(self, intp_t i, intp_t j) noexcept nogil: # Swapping arguments on the same interface return self.datasets_pair.surrogate_dist(j, i) @final - cdef DTYPE_t dist(self, ITYPE_t i, ITYPE_t j) noexcept nogil: + cdef float64_t dist(self, intp_t i, intp_t j) noexcept nogil: # Swapping arguments on the same interface return self.datasets_pair.dist(j, i) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp index 6b116f0f44d6f..bdf007bd0514a 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp @@ -8,30 +8,28 @@ implementation_specific_values = [ # We also use the float64 dtype and C-type names as defined in # `sklearn.utils._typedefs` to maintain consistency. # - ('64', False, 'DTYPE_t', 'DTYPE'), - ('32', True, 'cnp.float32_t', 'np.float32') + ('64', False, 'float64_t', 'np.float64'), + ('32', True, 'float32_t', 'np.float32') ] }} -cimport numpy as cnp - from libcpp.vector cimport vector -from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t +from ...utils._typedefs cimport float64_t, float32_t, int32_t, intp_t cdef void _middle_term_sparse_sparse_64( - const DTYPE_t[:] X_data, - const SPARSE_INDEX_TYPE_t[:] X_indices, - const SPARSE_INDEX_TYPE_t[:] X_indptr, - ITYPE_t X_start, - ITYPE_t X_end, - const DTYPE_t[:] Y_data, - const SPARSE_INDEX_TYPE_t[:] Y_indices, - const SPARSE_INDEX_TYPE_t[:] Y_indptr, - ITYPE_t Y_start, - ITYPE_t Y_end, - DTYPE_t * D, + const float64_t[:] X_data, + const int32_t[:] X_indices, + const int32_t[:] X_indptr, + intp_t X_start, + intp_t X_end, + const float64_t[:] Y_data, + const int32_t[:] Y_indices, + const int32_t[:] Y_indptr, + intp_t Y_start, + intp_t Y_end, + float64_t * D, ) noexcept nogil @@ -40,58 +38,58 @@ cdef void _middle_term_sparse_sparse_64( cdef class MiddleTermComputer{{name_suffix}}: cdef: - ITYPE_t effective_n_threads - ITYPE_t chunks_n_threads - ITYPE_t dist_middle_terms_chunks_size - ITYPE_t n_features - ITYPE_t chunk_size + intp_t effective_n_threads + intp_t chunks_n_threads + intp_t dist_middle_terms_chunks_size + intp_t n_features + intp_t chunk_size # Buffers for the `-2 * X_c @ Y_c.T` term computed via GEMM - vector[vector[DTYPE_t]] dist_middle_terms_chunks + vector[vector[float64_t]] dist_middle_terms_chunks cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil - cdef void _parallel_on_X_parallel_init(self, ITYPE_t thread_num) noexcept nogil + cdef void _parallel_on_X_parallel_init(self, intp_t thread_num) noexcept nogil cdef void _parallel_on_X_init_chunk( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil cdef void _parallel_on_Y_init(self) noexcept nogil cdef void _parallel_on_Y_parallel_init( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil @@ -102,95 +100,95 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_ {{if upcast_to_float64}} # Buffers for upcasting chunks of X and Y from 32bit to 64bit - vector[vector[DTYPE_t]] X_c_upcast - vector[vector[DTYPE_t]] Y_c_upcast + vector[vector[float64_t]] X_c_upcast + vector[vector[float64_t]] Y_c_upcast {{endif}} cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil cdef void _parallel_on_X_init_chunk( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil cdef void _parallel_on_Y_parallel_init( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}): cdef: - const DTYPE_t[:] X_data - const SPARSE_INDEX_TYPE_t[:] X_indices - const SPARSE_INDEX_TYPE_t[:] X_indptr + const float64_t[:] X_data + const int32_t[:] X_indices + const int32_t[:] X_indptr - const DTYPE_t[:] Y_data - const SPARSE_INDEX_TYPE_t[:] Y_indices - const SPARSE_INDEX_TYPE_t[:] Y_indptr + const float64_t[:] Y_data + const int32_t[:] Y_indices + const int32_t[:] Y_indptr cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}): cdef: - const DTYPE_t[:] X_data - const SPARSE_INDEX_TYPE_t[:] X_indices - const SPARSE_INDEX_TYPE_t[:] X_indptr + const float64_t[:] X_data + const int32_t[:] X_indices + const int32_t[:] X_indptr const {{INPUT_DTYPE_t}}[:, ::1] Y @@ -202,29 +200,29 @@ cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil {{endfor}} diff --git a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp index 255efc83565d5..f2d89ed65909c 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp +++ b/sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp @@ -8,13 +8,11 @@ implementation_specific_values = [ # We also use the float64 dtype and C-type names as defined in # `sklearn.utils._typedefs` to maintain consistency. # - ('64', False, 'DTYPE_t', 'DTYPE'), - ('32', True, 'cnp.float32_t', 'np.float32') + ('64', False, 'float64_t', 'np.float64'), + ('32', True, 'float32_t', 'np.float32') ] }} -cimport numpy as cnp - from libcpp.vector cimport vector from ...utils._cython_blas cimport ( @@ -25,7 +23,7 @@ from ...utils._cython_blas cimport ( Trans, _gemm, ) -from ...utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t +from ...utils._typedefs cimport float64_t, float32_t, int32_t, intp_t # TODO: change for `libcpp.algorithm.fill` once Cython 3 is used # Introduction in Cython: @@ -36,29 +34,29 @@ cdef extern from "" namespace "std" nogil: import numpy as np from scipy.sparse import issparse, csr_matrix -from ...utils._typedefs import DTYPE, SPARSE_INDEX_TYPE + cdef void _middle_term_sparse_sparse_64( - const DTYPE_t[:] X_data, - const SPARSE_INDEX_TYPE_t[:] X_indices, - const SPARSE_INDEX_TYPE_t[:] X_indptr, - ITYPE_t X_start, - ITYPE_t X_end, - const DTYPE_t[:] Y_data, - const SPARSE_INDEX_TYPE_t[:] Y_indices, - const SPARSE_INDEX_TYPE_t[:] Y_indptr, - ITYPE_t Y_start, - ITYPE_t Y_end, - DTYPE_t * D, + const float64_t[:] X_data, + const int32_t[:] X_indices, + const int32_t[:] X_indptr, + intp_t X_start, + intp_t X_end, + const float64_t[:] Y_data, + const int32_t[:] Y_indices, + const int32_t[:] Y_indptr, + intp_t Y_start, + intp_t Y_end, + float64_t * D, ) noexcept nogil: # This routine assumes that D points to the first element of a # zeroed buffer of length at least equal to n_X × n_Y, conceptually # representing a 2-d C-ordered array. cdef: - ITYPE_t i, j, k - ITYPE_t n_X = X_end - X_start - ITYPE_t n_Y = Y_end - Y_start - ITYPE_t x_col, x_ptr, y_col, y_ptr + intp_t i, j, k + intp_t n_X = X_end - X_start + intp_t n_Y = Y_end - Y_start + intp_t x_col, x_ptr, y_col, y_ptr for i in range(n_X): for x_ptr in range(X_indptr[X_start+i], X_indptr[X_start+i+1]): @@ -74,25 +72,25 @@ cdef void _middle_term_sparse_sparse_64( {{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} cdef void _middle_term_sparse_dense_{{name_suffix}}( - const DTYPE_t[:] X_data, - const SPARSE_INDEX_TYPE_t[:] X_indices, - const SPARSE_INDEX_TYPE_t[:] X_indptr, - ITYPE_t X_start, - ITYPE_t X_end, + const float64_t[:] X_data, + const int32_t[:] X_indices, + const int32_t[:] X_indptr, + intp_t X_start, + intp_t X_end, const {{INPUT_DTYPE_t}}[:, ::1] Y, - ITYPE_t Y_start, - ITYPE_t Y_end, + intp_t Y_start, + intp_t Y_end, bint c_ordered_middle_term, - DTYPE_t * dist_middle_terms, + float64_t * dist_middle_terms, ) nogil: # This routine assumes that dist_middle_terms is a pointer to the first element # of a buffer filled with zeros of length at least equal to n_X × n_Y, conceptually # representing a 2-d C-ordered of F-ordered array. cdef: - ITYPE_t i, j, k - ITYPE_t n_X = X_end - X_start - ITYPE_t n_Y = Y_end - Y_start - ITYPE_t X_i_col_idx, X_i_ptr, Y_j_col_idx, Y_j_ptr + intp_t i, j, k + intp_t n_X = X_end - X_start + intp_t n_Y = Y_end - Y_start + intp_t X_i_col_idx, X_i_ptr, Y_j_col_idx, Y_j_ptr for i in range(n_X): for j in range(n_Y): @@ -206,19 +204,19 @@ cdef class MiddleTermComputer{{name_suffix}}: @classmethod def unpack_csr_matrix(cls, X: csr_matrix): - """Ensure that the CSR matrix is indexed with SPARSE_INDEX_TYPE.""" - X_data = np.asarray(X.data, dtype=DTYPE) - X_indices = np.asarray(X.indices, dtype=SPARSE_INDEX_TYPE) - X_indptr = np.asarray(X.indptr, dtype=SPARSE_INDEX_TYPE) + """Ensure that the CSR matrix is indexed with np.int32.""" + X_data = np.asarray(X.data, dtype=np.float64) + X_indices = np.asarray(X.indices, dtype=np.int32) + X_indptr = np.asarray(X.indptr, dtype=np.int32) return X_data, X_indices, X_indptr def __init__( self, - ITYPE_t effective_n_threads, - ITYPE_t chunks_n_threads, - ITYPE_t dist_middle_terms_chunks_size, - ITYPE_t n_features, - ITYPE_t chunk_size, + intp_t effective_n_threads, + intp_t chunks_n_threads, + intp_t dist_middle_terms_chunks_size, + intp_t n_features, + intp_t chunk_size, ): self.effective_n_threads = effective_n_threads self.chunks_n_threads = chunks_n_threads @@ -226,26 +224,26 @@ cdef class MiddleTermComputer{{name_suffix}}: self.n_features = n_features self.chunk_size = chunk_size - self.dist_middle_terms_chunks = vector[vector[DTYPE_t]](self.effective_n_threads) + self.dist_middle_terms_chunks = vector[vector[float64_t]](self.effective_n_threads) cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: return - cdef void _parallel_on_X_parallel_init(self, ITYPE_t thread_num) noexcept nogil: + cdef void _parallel_on_X_parallel_init(self, intp_t thread_num) noexcept nogil: self.dist_middle_terms_chunks[thread_num].resize(self.dist_middle_terms_chunks_size) cdef void _parallel_on_X_init_chunk( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil: return @@ -257,29 +255,29 @@ cdef class MiddleTermComputer{{name_suffix}}: cdef void _parallel_on_Y_parallel_init( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil: return cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil: return - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: return NULL @@ -298,11 +296,11 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_ self, const {{INPUT_DTYPE_t}}[:, ::1] X, const {{INPUT_DTYPE_t}}[:, ::1] Y, - ITYPE_t effective_n_threads, - ITYPE_t chunks_n_threads, - ITYPE_t dist_middle_terms_chunks_size, - ITYPE_t n_features, - ITYPE_t chunk_size, + intp_t effective_n_threads, + intp_t chunks_n_threads, + intp_t dist_middle_terms_chunks_size, + intp_t n_features, + intp_t chunk_size, ): super().__init__( effective_n_threads, @@ -316,8 +314,8 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_ {{if upcast_to_float64}} # We populate the buffer for upcasting chunks of X and Y from float32 to float64. - self.X_c_upcast = vector[vector[DTYPE_t]](self.effective_n_threads) - self.Y_c_upcast = vector[vector[DTYPE_t]](self.effective_n_threads) + self.X_c_upcast = vector[vector[float64_t]](self.effective_n_threads) + self.Y_c_upcast = vector[vector[float64_t]](self.effective_n_threads) upcast_buffer_n_elements = self.chunk_size * n_features @@ -328,94 +326,94 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_ cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: {{if upcast_to_float64}} cdef: - ITYPE_t i, j - ITYPE_t n_chunk_samples = Y_end - Y_start + intp_t i, j + intp_t n_chunk_samples = Y_end - Y_start # Upcasting Y_c=Y[Y_start:Y_end, :] from float32 to float64 for i in range(n_chunk_samples): for j in range(self.n_features): - self.Y_c_upcast[thread_num][i * self.n_features + j] = self.Y[Y_start + i, j] + self.Y_c_upcast[thread_num][i * self.n_features + j] = self.Y[Y_start + i, j] {{else}} return {{endif}} cdef void _parallel_on_X_init_chunk( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil: {{if upcast_to_float64}} cdef: - ITYPE_t i, j - ITYPE_t n_chunk_samples = X_end - X_start + intp_t i, j + intp_t n_chunk_samples = X_end - X_start # Upcasting X_c=X[X_start:X_end, :] from float32 to float64 for i in range(n_chunk_samples): for j in range(self.n_features): - self.X_c_upcast[thread_num][i * self.n_features + j] = self.X[X_start + i, j] + self.X_c_upcast[thread_num][i * self.n_features + j] = self.X[X_start + i, j] {{else}} return {{endif}} cdef void _parallel_on_Y_parallel_init( self, - ITYPE_t thread_num, - ITYPE_t X_start, - ITYPE_t X_end, + intp_t thread_num, + intp_t X_start, + intp_t X_end, ) noexcept nogil: {{if upcast_to_float64}} cdef: - ITYPE_t i, j - ITYPE_t n_chunk_samples = X_end - X_start + intp_t i, j + intp_t n_chunk_samples = X_end - X_start # Upcasting X_c=X[X_start:X_end, :] from float32 to float64 for i in range(n_chunk_samples): for j in range(self.n_features): - self.X_c_upcast[thread_num][i * self.n_features + j] = self.X[X_start + i, j] + self.X_c_upcast[thread_num][i * self.n_features + j] = self.X[X_start + i, j] {{else}} return {{endif}} cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num ) noexcept nogil: {{if upcast_to_float64}} cdef: - ITYPE_t i, j - ITYPE_t n_chunk_samples = Y_end - Y_start + intp_t i, j + intp_t n_chunk_samples = Y_end - Y_start # Upcasting Y_c=Y[Y_start:Y_end, :] from float32 to float64 for i in range(n_chunk_samples): for j in range(self.n_features): - self.Y_c_upcast[thread_num][i * self.n_features + j] = self.Y[Y_start + i, j] + self.Y_c_upcast[thread_num][i * self.n_features + j] = self.Y[Y_start + i, j] {{else}} return {{endif}} - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: cdef: - DTYPE_t *dist_middle_terms = self.dist_middle_terms_chunks[thread_num].data() + float64_t *dist_middle_terms = self.dist_middle_terms_chunks[thread_num].data() # Careful: LDA, LDB and LDC are given for F-ordered arrays # in BLAS documentations, for instance: @@ -425,24 +423,24 @@ cdef class DenseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_ BLAS_Order order = RowMajor BLAS_Trans ta = NoTrans BLAS_Trans tb = Trans - ITYPE_t m = X_end - X_start - ITYPE_t n = Y_end - Y_start - ITYPE_t K = self.n_features - DTYPE_t alpha = - 2. + intp_t m = X_end - X_start + intp_t n = Y_end - Y_start + intp_t K = self.n_features + float64_t alpha = - 2. {{if upcast_to_float64}} - DTYPE_t * A = self.X_c_upcast[thread_num].data() - DTYPE_t * B = self.Y_c_upcast[thread_num].data() + float64_t * A = self.X_c_upcast[thread_num].data() + float64_t * B = self.Y_c_upcast[thread_num].data() {{else}} # Casting for A and B to remove the const is needed because APIs exposed via # scipy.linalg.cython_blas aren't reflecting the arguments' const qualifier. # See: https://github.com/scipy/scipy/issues/14262 - DTYPE_t * A = &self.X[X_start, 0] - DTYPE_t * B = &self.Y[Y_start, 0] + float64_t * A = &self.X[X_start, 0] + float64_t * B = &self.Y[Y_start, 0] {{endif}} - ITYPE_t lda = self.n_features - ITYPE_t ldb = self.n_features - DTYPE_t beta = 0. - ITYPE_t ldc = Y_end - Y_start + intp_t lda = self.n_features + intp_t ldb = self.n_features + float64_t beta = 0. + intp_t ldc = Y_end - Y_start # dist_middle_terms = `-2 * X[X_start:X_end] @ Y[Y_start:Y_end].T` _gemm(order, ta, tb, m, n, K, alpha, A, lda, B, ldb, beta, dist_middle_terms, ldc) @@ -466,11 +464,11 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam self, X, Y, - ITYPE_t effective_n_threads, - ITYPE_t chunks_n_threads, - ITYPE_t dist_middle_terms_chunks_size, - ITYPE_t n_features, - ITYPE_t chunk_size, + intp_t effective_n_threads, + intp_t chunks_n_threads, + intp_t dist_middle_terms_chunks_size, + intp_t n_features, + intp_t chunk_size, ): super().__init__( effective_n_threads, @@ -484,11 +482,11 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: # Flush the thread dist_middle_terms_chunks to 0.0 fill( @@ -499,11 +497,11 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: # Flush the thread dist_middle_terms_chunks to 0.0 fill( @@ -512,16 +510,16 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam 0.0, ) - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: cdef: - DTYPE_t *dist_middle_terms = ( + float64_t *dist_middle_terms = ( self.dist_middle_terms_chunks[thread_num].data() ) @@ -553,11 +551,11 @@ cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name self, X, Y, - ITYPE_t effective_n_threads, - ITYPE_t chunks_n_threads, - ITYPE_t dist_middle_terms_chunks_size, - ITYPE_t n_features, - ITYPE_t chunk_size, + intp_t effective_n_threads, + intp_t chunks_n_threads, + intp_t dist_middle_terms_chunks_size, + intp_t n_features, + intp_t chunk_size, bint c_ordered_middle_term, ): super().__init__( @@ -573,11 +571,11 @@ cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: # Fill the thread's dist_middle_terms_chunks with 0.0 before # computing its elements in _compute_dist_middle_terms. @@ -589,11 +587,11 @@ cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: # Fill the thread's dist_middle_terms_chunks with 0.0 before # computing its elements in _compute_dist_middle_terms. @@ -603,16 +601,16 @@ cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name 0.0, ) - cdef DTYPE_t * _compute_dist_middle_terms( + cdef float64_t * _compute_dist_middle_terms( self, - ITYPE_t X_start, - ITYPE_t X_end, - ITYPE_t Y_start, - ITYPE_t Y_end, - ITYPE_t thread_num, + intp_t X_start, + intp_t X_end, + intp_t Y_start, + intp_t Y_end, + intp_t thread_num, ) noexcept nogil: cdef: - DTYPE_t *dist_middle_terms = ( + float64_t *dist_middle_terms = ( self.dist_middle_terms_chunks[thread_num].data() ) diff --git a/sklearn/utils/_typedefs.pxd b/sklearn/utils/_typedefs.pxd index 7baed97c1ef03..f803b20e2efec 100644 --- a/sklearn/utils/_typedefs.pxd +++ b/sklearn/utils/_typedefs.pxd @@ -1,6 +1,3 @@ -#!python -cimport numpy as cnp - # Commonly used types # These are redefinitions of the ones defined by numpy in # https://github.com/numpy/numpy/blob/main/numpy/__init__.pxd @@ -8,7 +5,6 @@ cimport numpy as cnp # https://github.com/cython/cython/blob/master/Cython/Includes/numpy/__init__.pxd. # It will eventually avoid having to always include the numpy headers even when we # would only use it for the types. -# TODO: don't cimport numpy in this extension. # # When used to declare variables that will receive values from numpy arrays, it # should match the dtype of the array. For example, to declare a variable that will @@ -18,31 +14,14 @@ cimport numpy as cnp # TODO: Stop defining custom types locally or globally like DTYPE_t and friends and # use these consistently throughout the codebase. # NOTE: Extend this list as needed when converting more cython extensions. -ctypedef unsigned char bool_t +ctypedef unsigned char uint8_t ctypedef Py_ssize_t intp_t ctypedef float float32_t ctypedef double float64_t +# Sparse matrices indices and indices' pointers arrays must use int32_t over +# intp_t because intp_t is platform dependent. +# When large sparse matrices are supported, indexing must use int64_t. +# See https://github.com/scikit-learn/scikit-learn/issues/23653 which tracks the +# ongoing work to support large sparse matrices. ctypedef signed int int32_t ctypedef signed long long int64_t - - -# Floating point/data type -ctypedef cnp.float64_t DTYPE_t # WARNING: should match DTYPE in typedefs.pyx - -cdef enum: - DTYPECODE = cnp.NPY_FLOAT64 - -# Index/integer type. -# WARNING: ITYPE_t must be a signed integer type or you will have a bad time! -ctypedef cnp.intp_t ITYPE_t # WARNING: should match ITYPE in typedefs.pyx - -# scipy matrices indices dtype (namely for indptr and indices arrays) -# -# Note that indices might need to be represented as cnp.int64_t. -# Currently, we use Cython classes which do not handle fused types -# so we hardcode this type to cnp.int32_t, supporting all but edge -# cases. -# -# TODO: support cnp.int64_t for this case -# See: https://github.com/scikit-learn/scikit-learn/issues/23653 -ctypedef cnp.int32_t SPARSE_INDEX_TYPE_t diff --git a/sklearn/utils/_typedefs.pyx b/sklearn/utils/_typedefs.pyx index faf9bd386beca..22e18cdae8d2e 100644 --- a/sklearn/utils/_typedefs.pyx +++ b/sklearn/utils/_typedefs.pyx @@ -1,20 +1,20 @@ -#!python +# _typedefs is a declaration only module +# +# The functions implemented here are for testing purpose only. + import numpy as np -# use a hack to determine the associated numpy data types -# NOTE: the following requires the buffer interface, only available in -# numpy 1.5+. We'll choose the DTYPE by hand instead. -#cdef ITYPE_t idummy -#cdef ITYPE_t[:] idummy_view = &idummy -#ITYPE = np.asarray(idummy_view).dtype -ITYPE = np.intp # WARNING: this should match ITYPE_t in typedefs.pxd +ctypedef fused testing_type_t: + uint8_t + intp_t + float32_t + float64_t + int32_t + int64_t -#cdef DTYPE_t ddummy -#cdef DTYPE_t[:] ddummy_view = &ddummy -#DTYPE = np.asarray(ddummy_view).dtype -DTYPE = np.float64 # WARNING: this should match DTYPE_t in typedefs.pxd -# WARNING: this must match SPARSE_INDEX_TYPE_t in typedefs.pxd -SPARSE_INDEX_TYPE = np.int32 +def testing_make_array_from_typed_val(testing_type_t val): + cdef testing_type_t[:] val_view = &val + return np.asarray(val_view) diff --git a/sklearn/utils/tests/test_typedefs.py b/sklearn/utils/tests/test_typedefs.py new file mode 100644 index 0000000000000..f49356a91801a --- /dev/null +++ b/sklearn/utils/tests/test_typedefs.py @@ -0,0 +1,22 @@ +import numpy as np +import pytest + +from sklearn.utils._typedefs import testing_make_array_from_typed_val + + +@pytest.mark.parametrize( + "type_t, value, expected_dtype", + [ + ("uint8_t", 1, np.uint8), + ("intp_t", 1, np.intp), + ("float64_t", 1.0, np.float64), + ("float32_t", 1.0, np.float32), + ("int32_t", 1, np.int32), + ("int64_t", 1, np.int64), + ], +) +def test_types(type_t, value, expected_dtype): + """Check that the types defined in _typedefs correspond to the expected + numpy dtypes. + """ + assert testing_make_array_from_typed_val[type_t](value).dtype == expected_dtype