Skip to content

MAINT Remove ReadonlyArrayWrapper from _kmeans #25554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 38 additions & 9 deletions sklearn/cluster/_k_means_common.pxd
Original file line number Diff line number Diff line change
@@ -1,19 +1,48 @@
from cython cimport floating


cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil
cdef floating _euclidean_dense_dense(
const floating*,
const floating*,
int,
bint
) nogil

cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1],
floating, bint) nogil
cdef floating _euclidean_sparse_dense(
const floating[::1],
const int[::1],
const floating[::1],
floating,
bint
) nogil

cpdef void _relocate_empty_clusters_dense(
floating[:, ::1], floating[::1], floating[:, ::1],
floating[:, ::1], floating[::1], int[::1])
const floating[:, ::1],
const floating[::1],
const floating[:, ::1],
floating[:, ::1],
floating[::1],
const int[::1]
)

cpdef void _relocate_empty_clusters_sparse(
floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1],
floating[:, ::1], floating[::1], int[::1])
const floating[::1],
const int[::1],
const int[::1],
const floating[::1],
const floating[:, ::1],
floating[:, ::1],
floating[::1],
const int[::1]
)

cdef void _average_centers(floating[:, ::1], floating[::1])
cdef void _average_centers(
floating[:, ::1],
const floating[::1]
)

cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1])
cdef void _center_shift(
const floating[:, ::1],
const floating[:, ::1],
floating[::1]
)
96 changes: 55 additions & 41 deletions sklearn/cluster/_k_means_common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ CHUNK_SIZE = 256


cdef floating _euclidean_dense_dense(
floating* a, # IN
floating* b, # IN
const floating* a, # IN
const floating* b, # IN
int n_features,
bint squared) nogil:
bint squared
) nogil:
"""Euclidean distance between a dense and b dense"""
cdef:
int i
Expand All @@ -46,18 +47,22 @@ cdef floating _euclidean_dense_dense(
return result if squared else sqrt(result)


def _euclidean_dense_dense_wrapper(floating[::1] a, floating[::1] b,
bint squared):
def _euclidean_dense_dense_wrapper(
const floating[::1] a,
const floating[::1] b,
bint squared
):
"""Wrapper of _euclidean_dense_dense for testing purpose"""
return _euclidean_dense_dense(&a[0], &b[0], a.shape[0], squared)


cdef floating _euclidean_sparse_dense(
floating[::1] a_data, # IN
int[::1] a_indices, # IN
floating[::1] b, # IN
const floating[::1] a_data, # IN
const int[::1] a_indices, # IN
const floating[::1] b, # IN
floating b_squared_norm,
bint squared) nogil:
bint squared
) nogil:
"""Euclidean distance between a sparse and b dense"""
cdef:
int nnz = a_indices.shape[0]
Expand All @@ -78,21 +83,22 @@ cdef floating _euclidean_sparse_dense(


def _euclidean_sparse_dense_wrapper(
floating[::1] a_data,
int[::1] a_indices,
floating[::1] b,
const floating[::1] a_data,
const int[::1] a_indices,
const floating[::1] b,
floating b_squared_norm,
bint squared):
bint squared
):
"""Wrapper of _euclidean_sparse_dense for testing purpose"""
return _euclidean_sparse_dense(
a_data, a_indices, b, b_squared_norm, squared)


cpdef floating _inertia_dense(
floating[:, ::1] X, # IN READ-ONLY
floating[::1] sample_weight, # IN READ-ONLY
floating[:, ::1] centers, # IN
int[::1] labels, # IN
const floating[:, ::1] X, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers, # IN
const int[::1] labels, # IN
int n_threads,
int single_label=-1,
):
Expand Down Expand Up @@ -122,10 +128,10 @@ cpdef floating _inertia_dense(


cpdef floating _inertia_sparse(
X, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers, # IN
int[::1] labels, # IN
X, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers, # IN
const int[::1] labels, # IN
int n_threads,
int single_label=-1,
):
Expand Down Expand Up @@ -162,12 +168,13 @@ cpdef floating _inertia_sparse(


cpdef void _relocate_empty_clusters_dense(
floating[:, ::1] X, # IN READ-ONLY
floating[::1] sample_weight, # IN READ-ONLY
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # INOUT
floating[::1] weight_in_clusters, # INOUT
int[::1] labels): # IN
const floating[:, ::1] X, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # INOUT
floating[::1] weight_in_clusters, # INOUT
const int[::1] labels # IN
):
"""Relocate centers which have no sample assigned to them."""
cdef:
int[::1] empty_clusters = np.where(np.equal(weight_in_clusters, 0))[0].astype(np.int32)
Expand Down Expand Up @@ -203,14 +210,15 @@ cpdef void _relocate_empty_clusters_dense(


cpdef void _relocate_empty_clusters_sparse(
floating[::1] X_data, # IN
int[::1] X_indices, # IN
int[::1] X_indptr, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # INOUT
floating[::1] weight_in_clusters, # INOUT
int[::1] labels): # IN
const floating[::1] X_data, # IN
const int[::1] X_indices, # IN
const int[::1] X_indptr, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # INOUT
floating[::1] weight_in_clusters, # INOUT
const int[::1] labels # IN
):
"""Relocate centers which have no sample assigned to them."""
cdef:
int[::1] empty_clusters = np.where(np.equal(weight_in_clusters, 0))[0].astype(np.int32)
Expand Down Expand Up @@ -257,8 +265,9 @@ cpdef void _relocate_empty_clusters_sparse(


cdef void _average_centers(
floating[:, ::1] centers, # INOUT
floating[::1] weight_in_clusters): # IN
floating[:, ::1] centers, # INOUT
const floating[::1] weight_in_clusters # IN
):
"""Average new centers wrt weights."""
cdef:
int n_clusters = centers.shape[0]
Expand All @@ -274,9 +283,10 @@ cdef void _average_centers(


cdef void _center_shift(
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # IN
floating[::1] center_shift): # OUT
const floating[:, ::1] centers_old, # IN
const floating[:, ::1] centers_new, # IN
floating[::1] center_shift # OUT
):
"""Compute shift between old and new centers."""
cdef:
int n_clusters = centers_old.shape[0]
Expand All @@ -288,7 +298,11 @@ cdef void _center_shift(
&centers_new[j, 0], &centers_old[j, 0], n_features, False)


def _is_same_clustering(int[::1] labels1, int[::1] labels2, n_clusters):
def _is_same_clustering(
const int[::1] labels1,
const int[::1] labels2,
n_clusters
):
"""Check if two arrays of labels are the same up to a permutation of the labels"""
cdef int[::1] mapping = np.full(fill_value=-1, shape=(n_clusters,), dtype=np.int32)
cdef int i
Expand Down
114 changes: 57 additions & 57 deletions sklearn/cluster/_k_means_elkan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ from ._k_means_common cimport _center_shift


def init_bounds_dense(
floating[:, ::1] X, # IN READ-ONLY
floating[:, ::1] centers, # IN
floating[:, ::1] center_half_distances, # IN
int[::1] labels, # OUT
floating[::1] upper_bounds, # OUT
floating[:, ::1] lower_bounds, # OUT
const floating[:, ::1] X, # IN
const floating[:, ::1] centers, # IN
const floating[:, ::1] center_half_distances, # IN
int[::1] labels, # OUT
floating[::1] upper_bounds, # OUT
floating[:, ::1] lower_bounds, # OUT
int n_threads):
"""Initialize upper and lower bounds for each sample for dense input data.

Expand Down Expand Up @@ -100,12 +100,12 @@ def init_bounds_dense(


def init_bounds_sparse(
X, # IN
floating[:, ::1] centers, # IN
floating[:, ::1] center_half_distances, # IN
int[::1] labels, # OUT
floating[::1] upper_bounds, # OUT
floating[:, ::1] lower_bounds, # OUT
X, # IN
const floating[:, ::1] centers, # IN
const floating[:, ::1] center_half_distances, # IN
int[::1] labels, # OUT
floating[::1] upper_bounds, # OUT
floating[:, ::1] lower_bounds, # OUT
int n_threads):
"""Initialize upper and lower bounds for each sample for sparse input data.

Expand Down Expand Up @@ -187,17 +187,17 @@ def init_bounds_sparse(


def elkan_iter_chunked_dense(
floating[:, ::1] X, # IN READ-ONLY
floating[::1] sample_weight, # IN READ-ONLY
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # OUT
floating[::1] weight_in_clusters, # OUT
floating[:, ::1] center_half_distances, # IN
floating[::1] distance_next_center, # IN
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
int[::1] labels, # INOUT
floating[::1] center_shift, # OUT
const floating[:, ::1] X, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # OUT
floating[::1] weight_in_clusters, # OUT
const floating[:, ::1] center_half_distances, # IN
const floating[::1] distance_next_center, # IN
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
int[::1] labels, # INOUT
floating[::1] center_shift, # OUT
int n_threads,
bint update_centers=True):
"""Single iteration of K-means Elkan algorithm with dense input.
Expand Down Expand Up @@ -350,16 +350,16 @@ def elkan_iter_chunked_dense(


cdef void _update_chunk_dense(
floating[:, ::1] X, # IN READ-ONLY
floating[::1] sample_weight, # IN READ-ONLY
floating[:, ::1] centers_old, # IN
floating[:, ::1] center_half_distances, # IN
floating[::1] distance_next_center, # IN
int[::1] labels, # INOUT
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
floating *centers_new, # OUT
floating *weight_in_clusters, # OUT
const floating[:, ::1] X, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers_old, # IN
const floating[:, ::1] center_half_distances, # IN
const floating[::1] distance_next_center, # IN
int[::1] labels, # INOUT
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
floating *centers_new, # OUT
floating *weight_in_clusters, # OUT
bint update_centers) nogil:
"""K-means combined EM step for one dense data chunk.

Expand Down Expand Up @@ -423,17 +423,17 @@ cdef void _update_chunk_dense(


def elkan_iter_chunked_sparse(
X, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # OUT
floating[::1] weight_in_clusters, # OUT
floating[:, ::1] center_half_distances, # IN
floating[::1] distance_next_center, # IN
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
int[::1] labels, # INOUT
floating[::1] center_shift, # OUT
X, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # OUT
floating[::1] weight_in_clusters, # OUT
const floating[:, ::1] center_half_distances, # IN
const floating[::1] distance_next_center, # IN
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
int[::1] labels, # INOUT
floating[::1] center_shift, # OUT
int n_threads,
bint update_centers=True):
"""Single iteration of K-means Elkan algorithm with sparse input.
Expand Down Expand Up @@ -596,19 +596,19 @@ def elkan_iter_chunked_sparse(


cdef void _update_chunk_sparse(
floating[::1] X_data, # IN
int[::1] X_indices, # IN
int[::1] X_indptr, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers_old, # IN
floating[::1] centers_squared_norms, # IN
floating[:, ::1] center_half_distances, # IN
floating[::1] distance_next_center, # IN
int[::1] labels, # INOUT
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
floating *centers_new, # OUT
floating *weight_in_clusters, # OUT
const floating[::1] X_data, # IN
const int[::1] X_indices, # IN
const int[::1] X_indptr, # IN
const floating[::1] sample_weight, # IN
const floating[:, ::1] centers_old, # IN
const floating[::1] centers_squared_norms, # IN
const floating[:, ::1] center_half_distances, # IN
const floating[::1] distance_next_center, # IN
int[::1] labels, # INOUT
floating[::1] upper_bounds, # INOUT
floating[:, ::1] lower_bounds, # INOUT
floating *centers_new, # OUT
floating *weight_in_clusters, # OUT
bint update_centers) nogil:
"""K-means combined EM step for one sparse data chunk.

Expand Down
Loading