Skip to content

MAINT Consistent cython types from _typedefs #25942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 29, 2023
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ def check_package_status(package, min_version):
{
"sources": ["_middle_term_computer.pyx.tp", "_middle_term_computer.pxd.tp"],
"language": "c++",
"include_np": True,
"extra_compile_args": ["-std=c++11"],
},
{
Expand Down Expand Up @@ -397,9 +396,9 @@ def check_package_status(package, min_version):
},
{"sources": ["_random.pyx"], "include_np": True},
{"sources": ["_logistic_sigmoid.pyx"], "include_np": True},
{"sources": ["_typedefs.pyx"], "include_np": True},
{"sources": ["_heap.pyx"], "include_np": True},
{"sources": ["_sorting.pyx"], "include_np": True},
{"sources": ["_typedefs.pyx"]},
{"sources": ["_heap.pyx"]},
{"sources": ["_sorting.pyx"]},
{"sources": ["_vector_sentinel.pyx"], "language": "c++", "include_np": True},
{"sources": ["_isfinite.pyx"]},
],
Expand Down
6 changes: 3 additions & 3 deletions sklearn/cluster/_hierarchical_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ cimport cython

from ..metrics._dist_metrics cimport DistanceMetric
from ..utils._fast_dict cimport IntFloatDict
from ..utils._typedefs cimport float64_t, intp_t, bool_t
from ..utils._typedefs cimport float64_t, intp_t, uint8_t

# C++
from cython.operator cimport dereference as deref, preincrement as inc
Expand Down Expand Up @@ -119,7 +119,7 @@ def _get_parents(
nodes,
heads,
const intp_t[:] parents,
bool_t[::1] not_visited
uint8_t[::1] not_visited
):
"""Returns the heads of the given nodes, as defined by parents.

Expand Down Expand Up @@ -465,7 +465,7 @@ def mst_linkage_core(
"""
cdef:
intp_t n_samples = raw_data.shape[0]
bool_t[:] in_tree = np.zeros(n_samples, dtype=bool)
uint8_t[:] in_tree = np.zeros(n_samples, dtype=bool)
float64_t[:, ::1] result = np.zeros((n_samples - 1, 3))

intp_t current_node = 0
Expand Down
113 changes: 54 additions & 59 deletions sklearn/metrics/_dist_metrics.pxd.tp
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,21 @@ implementation_specific_values = [
# for the float64 case as to still be able to expose the original
# float64 implementation under the same API, namely `DistanceMetric`.
#
# On the other hand, '32' bit is used for `name_suffix` for the float32
# On the other hand, '32' is used for `name_suffix` for the float32
# case to remove ambiguity and use `DistanceMetric32`, which is not
# publicly exposed.
#
# The metric mapping is adapted accordingly to route to the correct
# implementations.
#
# We also use 64bit types as defined in `sklearn.utils._typedefs`
# to maintain backward compatibility at the symbol level for extra
# safety.
#
('', 'DTYPE_t', 'DTYPE'),
('32', 'cnp.float32_t', 'np.float32')
('', 'float64_t', 'np.float64'),
('32', 'float32_t', 'np.float32')
]

}}
cimport numpy as cnp
from libc.math cimport sqrt, exp

from ..utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t
from ..utils._typedefs cimport float64_t, float32_t, int32_t, intp_t

{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}

Expand All @@ -37,37 +32,37 @@ from ..utils._typedefs cimport DTYPE_t, ITYPE_t, SPARSE_INDEX_TYPE_t
#
# We use these for the default (euclidean) case so that they can be
# inlined. This leads to faster computation for the most common case
cdef inline DTYPE_t euclidean_dist{{name_suffix}}(
cdef inline float64_t euclidean_dist{{name_suffix}}(
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
intp_t size,
) except -1 nogil:
cdef DTYPE_t tmp, d=0
cdef cnp.intp_t j
cdef float64_t tmp, d=0
cdef intp_t j
for j in range(size):
tmp = <DTYPE_t> (x1[j] - x2[j])
tmp = <float64_t> (x1[j] - x2[j])
d += tmp * tmp
return sqrt(d)


cdef inline DTYPE_t euclidean_rdist{{name_suffix}}(
cdef inline float64_t euclidean_rdist{{name_suffix}}(
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
intp_t size,
) except -1 nogil:
cdef DTYPE_t tmp, d=0
cdef cnp.intp_t j
cdef float64_t tmp, d=0
cdef intp_t j
for j in range(size):
tmp = <DTYPE_t>(x1[j] - x2[j])
tmp = <float64_t>(x1[j] - x2[j])
d += tmp * tmp
return d


cdef inline DTYPE_t euclidean_dist_to_rdist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil:
cdef inline float64_t euclidean_dist_to_rdist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil:
return dist * dist


cdef inline DTYPE_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil:
cdef inline float64_t euclidean_rdist_to_dist{{name_suffix}}(const {{INPUT_DTYPE_t}} dist) except -1 nogil:
return sqrt(dist)


Expand All @@ -78,89 +73,89 @@ cdef class DistanceMetric{{name_suffix}}:
# we must define them here so that cython's limited polymorphism will work.
# Because we don't expect to instantiate a lot of these objects, the
# extra memory overhead of this setup should not be an issue.
cdef DTYPE_t p
cdef const DTYPE_t[::1] vec
cdef const DTYPE_t[:, ::1] mat
cdef ITYPE_t size
cdef float64_t p
cdef const float64_t[::1] vec
cdef const float64_t[:, ::1] mat
cdef intp_t size
cdef object func
cdef object kwargs

cdef DTYPE_t dist(
cdef float64_t dist(
self,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
intp_t size,
) except -1 nogil

cdef DTYPE_t rdist(
cdef float64_t rdist(
self,
const {{INPUT_DTYPE_t}}* x1,
const {{INPUT_DTYPE_t}}* x2,
ITYPE_t size,
intp_t size,
) except -1 nogil

cdef DTYPE_t dist_csr(
cdef float64_t dist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const int32_t[:] x1_indices,
const {{INPUT_DTYPE_t}}* x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t x1_start,
const SPARSE_INDEX_TYPE_t x1_end,
const SPARSE_INDEX_TYPE_t x2_start,
const SPARSE_INDEX_TYPE_t x2_end,
const ITYPE_t size,
const int32_t[:] x2_indices,
const int32_t x1_start,
const int32_t x1_end,
const int32_t x2_start,
const int32_t x2_end,
const intp_t size,
) except -1 nogil

cdef DTYPE_t rdist_csr(
cdef float64_t rdist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const int32_t[:] x1_indices,
const {{INPUT_DTYPE_t}}* x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t x1_start,
const SPARSE_INDEX_TYPE_t x1_end,
const SPARSE_INDEX_TYPE_t x2_start,
const SPARSE_INDEX_TYPE_t x2_end,
const ITYPE_t size,
const int32_t[:] x2_indices,
const int32_t x1_start,
const int32_t x1_end,
const int32_t x2_start,
const int32_t x2_end,
const intp_t size,
) except -1 nogil

cdef int pdist(
self,
const {{INPUT_DTYPE_t}}[:, ::1] X,
DTYPE_t[:, ::1] D,
float64_t[:, ::1] D,
) except -1

cdef int cdist(
self,
const {{INPUT_DTYPE_t}}[:, ::1] X,
const {{INPUT_DTYPE_t}}[:, ::1] Y,
DTYPE_t[:, ::1] D,
float64_t[:, ::1] D,
) except -1

cdef int pdist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const SPARSE_INDEX_TYPE_t[:] x1_indptr,
const ITYPE_t size,
DTYPE_t[:, ::1] D,
const int32_t[:] x1_indices,
const int32_t[:] x1_indptr,
const intp_t size,
float64_t[:, ::1] D,
) except -1 nogil

cdef int cdist_csr(
self,
const {{INPUT_DTYPE_t}}* x1_data,
const SPARSE_INDEX_TYPE_t[:] x1_indices,
const SPARSE_INDEX_TYPE_t[:] x1_indptr,
const int32_t[:] x1_indices,
const int32_t[:] x1_indptr,
const {{INPUT_DTYPE_t}}* x2_data,
const SPARSE_INDEX_TYPE_t[:] x2_indices,
const SPARSE_INDEX_TYPE_t[:] x2_indptr,
const ITYPE_t size,
DTYPE_t[:, ::1] D,
const int32_t[:] x2_indices,
const int32_t[:] x2_indptr,
const intp_t size,
float64_t[:, ::1] D,
) except -1 nogil

cdef DTYPE_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil
cdef float64_t _rdist_to_dist(self, {{INPUT_DTYPE_t}} rdist) except -1 nogil

cdef DTYPE_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil
cdef float64_t _dist_to_rdist(self, {{INPUT_DTYPE_t}} dist) except -1 nogil

{{endfor}}
Loading