scikit-learn · lorentzenchr · Feb 9, 2023 · Feb 6, 2023 · Feb 9, 2023 · Feb 9, 2023
diff --git a/sklearn/cluster/_k_means_common.pxd b/sklearn/cluster/_k_means_common.pxd
@@ -1,19 +1,48 @@
 from cython cimport floating
 
 
-cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil
+cdef floating _euclidean_dense_dense(
+    const floating*,
+    const floating*,
+    int,
+    bint
+) nogil
 
-cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1],
-                                      floating, bint) nogil
+cdef floating _euclidean_sparse_dense(
+    const floating[::1],
+    const int[::1],
+    const floating[::1],
+    floating, 
+    bint
+) nogil
 
 cpdef void _relocate_empty_clusters_dense(
-    floating[:, ::1], floating[::1], floating[:, ::1],
-    floating[:, ::1], floating[::1], int[::1])
+    const floating[:, ::1],
+    const floating[::1],
+    const floating[:, ::1],
+    floating[:, ::1],
+    floating[::1],
+    const int[::1]
+)
 
 cpdef void _relocate_empty_clusters_sparse(
-    floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1],
-    floating[:, ::1], floating[::1], int[::1])
+    const floating[::1],
+    const int[::1],
+    const int[::1],
+    const floating[::1],
+    const floating[:, ::1],
+    floating[:, ::1],
+    floating[::1],
+    const int[::1]
+)
 
-cdef void _average_centers(floating[:, ::1], floating[::1])
+cdef void _average_centers(
+    floating[:, ::1],
+    const floating[::1]
+)
 
-cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1])
+cdef void _center_shift(
+    const floating[:, ::1],
+    const floating[:, ::1],
+    floating[::1]
+)
diff --git a/sklearn/cluster/_k_means_common.pyx b/sklearn/cluster/_k_means_common.pyx
@@ -21,10 +21,11 @@ CHUNK_SIZE = 256
 
 
 cdef floating _euclidean_dense_dense(
-        floating* a,  # IN
-        floating* b,  # IN
+        const floating* a,  # IN
+        const floating* b,  # IN
         int n_features,
-        bint squared) nogil:
+        bint squared
+) nogil:
     """Euclidean distance between a dense and b dense"""
     cdef:
         int i
@@ -46,18 +47,22 @@ cdef floating _euclidean_dense_dense(
     return result if squared else sqrt(result)
 
 
-def _euclidean_dense_dense_wrapper(floating[::1] a, floating[::1] b,
-                                   bint squared):
+def _euclidean_dense_dense_wrapper(
+    const floating[::1] a,
+    const floating[::1] b,
+    bint squared
+):
     """Wrapper of _euclidean_dense_dense for testing purpose"""
     return _euclidean_dense_dense(&a[0], &b[0], a.shape[0], squared)
 
 
 cdef floating _euclidean_sparse_dense(
-        floating[::1] a_data,  # IN
-        int[::1] a_indices,    # IN
-        floating[::1] b,       # IN
+        const floating[::1] a_data,  # IN
+        const int[::1] a_indices,    # IN
+        const floating[::1] b,       # IN
         floating b_squared_norm,
-        bint squared) nogil:
+        bint squared
+) nogil:
     """Euclidean distance between a sparse and b dense"""
     cdef:
         int nnz = a_indices.shape[0]
@@ -78,21 +83,22 @@ cdef floating _euclidean_sparse_dense(
 
 
 def _euclidean_sparse_dense_wrapper(
-        floating[::1] a_data,
-        int[::1] a_indices,
-        floating[::1] b,
+        const floating[::1] a_data,
+        const int[::1] a_indices,
+        const floating[::1] b,
         floating b_squared_norm,
-        bint squared):
+        bint squared
+):
     """Wrapper of _euclidean_sparse_dense for testing purpose"""
     return _euclidean_sparse_dense(
         a_data, a_indices, b, b_squared_norm, squared)
 
 
 cpdef floating _inertia_dense(
-        floating[:, ::1] X,           # IN READ-ONLY
-        floating[::1] sample_weight,  # IN READ-ONLY
-        floating[:, ::1] centers,     # IN
-        int[::1] labels,              # IN
+        const floating[:, ::1] X,           # IN
+        const floating[::1] sample_weight,  # IN
+        const floating[:, ::1] centers,     # IN
+        const int[::1] labels,              # IN
         int n_threads,
         int single_label=-1,
 ):
@@ -122,10 +128,10 @@ cpdef floating _inertia_dense(
 
 
 cpdef floating _inertia_sparse(
-        X,                            # IN
-        floating[::1] sample_weight,  # IN
-        floating[:, ::1] centers,     # IN
-        int[::1] labels,              # IN
+        X,                                  # IN
+        const floating[::1] sample_weight,  # IN
+        const floating[:, ::1] centers,     # IN
+        const int[::1] labels,              # IN
         int n_threads,
         int single_label=-1,
 ):
@@ -162,12 +168,13 @@ cpdef floating _inertia_sparse(
 
 
 cpdef void _relocate_empty_clusters_dense(
-        floating[:, ::1] X,                # IN READ-ONLY
-        floating[::1] sample_weight,       # IN READ-ONLY
-        floating[:, ::1] centers_old,      # IN
-        floating[:, ::1] centers_new,      # INOUT
-        floating[::1] weight_in_clusters,  # INOUT
-        int[::1] labels):                  # IN
+        const floating[:, ::1] X,            # IN
+        const floating[::1] sample_weight,   # IN
+        const floating[:, ::1] centers_old,  # IN
+        floating[:, ::1] centers_new,        # INOUT
+        floating[::1] weight_in_clusters,    # INOUT
+        const int[::1] labels                # IN
+):
     """Relocate centers which have no sample assigned to them."""
     cdef:
         int[::1] empty_clusters = np.where(np.equal(weight_in_clusters, 0))[0].astype(np.int32)
@@ -203,14 +210,15 @@ cpdef void _relocate_empty_clusters_dense(
 
 
 cpdef void _relocate_empty_clusters_sparse(
-        floating[::1] X_data,              # IN
-        int[::1] X_indices,                # IN
-        int[::1] X_indptr,                 # IN
-        floating[::1] sample_weight,       # IN
-        floating[:, ::1] centers_old,      # IN
-        floating[:, ::1] centers_new,      # INOUT
-        floating[::1] weight_in_clusters,  # INOUT
-        int[::1] labels):                  # IN
+        const floating[::1] X_data,          # IN
+        const int[::1] X_indices,            # IN
+        const int[::1] X_indptr,             # IN
+        const floating[::1] sample_weight,   # IN
+        const floating[:, ::1] centers_old,  # IN
+        floating[:, ::1] centers_new,        # INOUT
+        floating[::1] weight_in_clusters,    # INOUT
+        const int[::1] labels                # IN
+):
     """Relocate centers which have no sample assigned to them."""
     cdef:
         int[::1] empty_clusters = np.where(np.equal(weight_in_clusters, 0))[0].astype(np.int32)
@@ -257,8 +265,9 @@ cpdef void _relocate_empty_clusters_sparse(
 
 
 cdef void _average_centers(
-        floating[:, ::1] centers,           # INOUT
-        floating[::1] weight_in_clusters):  # IN
+        floating[:, ::1] centers,               # INOUT
+        const floating[::1] weight_in_clusters  # IN
+):
     """Average new centers wrt weights."""
     cdef:
         int n_clusters = centers.shape[0]
@@ -274,9 +283,10 @@ cdef void _average_centers(
 
 
 cdef void _center_shift(
-        floating[:, ::1] centers_old,  # IN
-        floating[:, ::1] centers_new,  # IN
-        floating[::1] center_shift):   # OUT
+        const floating[:, ::1] centers_old,  # IN
+        const floating[:, ::1] centers_new,  # IN
+        floating[::1] center_shift           # OUT
+):
     """Compute shift between old and new centers."""
     cdef:
         int n_clusters = centers_old.shape[0]
@@ -288,7 +298,11 @@ cdef void _center_shift(
             &centers_new[j, 0], &centers_old[j, 0], n_features, False)
 
 
-def _is_same_clustering(int[::1] labels1, int[::1] labels2, n_clusters):
+def _is_same_clustering(
+    const int[::1] labels1,
+    const int[::1] labels2,
+    n_clusters
+):
     """Check if two arrays of labels are the same up to a permutation of the labels"""
     cdef int[::1] mapping = np.full(fill_value=-1, shape=(n_clusters,), dtype=np.int32)
     cdef int i

diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx
@@ -24,12 +24,12 @@ from ._k_means_common cimport _center_shift
 
 
 def init_bounds_dense(
-        floating[:, ::1] X,                      # IN READ-ONLY
-        floating[:, ::1] centers,                # IN
-        floating[:, ::1] center_half_distances,  # IN
-        int[::1] labels,                         # OUT
-        floating[::1] upper_bounds,              # OUT
-        floating[:, ::1] lower_bounds,           # OUT
+        const floating[:, ::1] X,                      # IN
+        const floating[:, ::1] centers,                # IN
+        const floating[:, ::1] center_half_distances,  # IN
+        int[::1] labels,                               # OUT
+        floating[::1] upper_bounds,                    # OUT
+        floating[:, ::1] lower_bounds,                 # OUT
         int n_threads):
     """Initialize upper and lower bounds for each sample for dense input data.
 
@@ -100,12 +100,12 @@ def init_bounds_dense(
 
 
 def init_bounds_sparse(
-        X,                                       # IN
-        floating[:, ::1] centers,                # IN
-        floating[:, ::1] center_half_distances,  # IN
-        int[::1] labels,                         # OUT
-        floating[::1] upper_bounds,              # OUT
-        floating[:, ::1] lower_bounds,           # OUT
+        X,                                             # IN
+        const floating[:, ::1] centers,                # IN
+        const floating[:, ::1] center_half_distances,  # IN
+        int[::1] labels,                               # OUT
+        floating[::1] upper_bounds,                    # OUT
+        floating[:, ::1] lower_bounds,                 # OUT
         int n_threads):
     """Initialize upper and lower bounds for each sample for sparse input data.
 
@@ -187,17 +187,17 @@ def init_bounds_sparse(
 
 
 def elkan_iter_chunked_dense(
-        floating[:, ::1] X,                      # IN READ-ONLY
-        floating[::1] sample_weight,             # IN READ-ONLY
-        floating[:, ::1] centers_old,            # IN
-        floating[:, ::1] centers_new,            # OUT
-        floating[::1] weight_in_clusters,        # OUT
-        floating[:, ::1] center_half_distances,  # IN
-        floating[::1] distance_next_center,      # IN
-        floating[::1] upper_bounds,              # INOUT
-        floating[:, ::1] lower_bounds,           # INOUT
-        int[::1] labels,                         # INOUT
-        floating[::1] center_shift,              # OUT
+        const floating[:, ::1] X,                      # IN
+        const floating[::1] sample_weight,             # IN
+        const floating[:, ::1] centers_old,            # IN
+        floating[:, ::1] centers_new,                  # OUT
+        floating[::1] weight_in_clusters,              # OUT
+        const floating[:, ::1] center_half_distances,  # IN
+        const floating[::1] distance_next_center,      # IN
+        floating[::1] upper_bounds,                    # INOUT
+        floating[:, ::1] lower_bounds,                 # INOUT
+        int[::1] labels,                               # INOUT
+        floating[::1] center_shift,                    # OUT
         int n_threads,
         bint update_centers=True):
     """Single iteration of K-means Elkan algorithm with dense input.
@@ -350,16 +350,16 @@ def elkan_iter_chunked_dense(
 
 
 cdef void _update_chunk_dense(
-        floating[:, ::1] X,                      # IN READ-ONLY
-        floating[::1] sample_weight,             # IN READ-ONLY
-        floating[:, ::1] centers_old,            # IN
-        floating[:, ::1] center_half_distances,  # IN
-        floating[::1] distance_next_center,      # IN
-        int[::1] labels,                         # INOUT
-        floating[::1] upper_bounds,              # INOUT
-        floating[:, ::1] lower_bounds,           # INOUT
-        floating *centers_new,                   # OUT
-        floating *weight_in_clusters,            # OUT
+        const floating[:, ::1] X,                      # IN
+        const floating[::1] sample_weight,             # IN
+        const floating[:, ::1] centers_old,            # IN
+        const floating[:, ::1] center_half_distances,  # IN
+        const floating[::1] distance_next_center,      # IN
+        int[::1] labels,                               # INOUT
+        floating[::1] upper_bounds,                    # INOUT
+        floating[:, ::1] lower_bounds,                 # INOUT
+        floating *centers_new,                         # OUT
+        floating *weight_in_clusters,                  # OUT
         bint update_centers) nogil:
     """K-means combined EM step for one dense data chunk.
 
@@ -423,17 +423,17 @@ cdef void _update_chunk_dense(
 
 
 def elkan_iter_chunked_sparse(
-        X,                                       # IN
-        floating[::1] sample_weight,             # IN
-        floating[:, ::1] centers_old,            # IN
-        floating[:, ::1] centers_new,            # OUT
-        floating[::1] weight_in_clusters,        # OUT
-        floating[:, ::1] center_half_distances,  # IN
-        floating[::1] distance_next_center,      # IN
-        floating[::1] upper_bounds,              # INOUT
-        floating[:, ::1] lower_bounds,           # INOUT
-        int[::1] labels,                         # INOUT
-        floating[::1] center_shift,              # OUT
+        X,                                             # IN
+        const floating[::1] sample_weight,             # IN
+        const floating[:, ::1] centers_old,            # IN
+        floating[:, ::1] centers_new,                  # OUT
+        floating[::1] weight_in_clusters,              # OUT
+        const floating[:, ::1] center_half_distances,  # IN
+        const floating[::1] distance_next_center,      # IN
+        floating[::1] upper_bounds,                    # INOUT
+        floating[:, ::1] lower_bounds,                 # INOUT
+        int[::1] labels,                               # INOUT
+        floating[::1] center_shift,                    # OUT
         int n_threads,
         bint update_centers=True):
     """Single iteration of K-means Elkan algorithm with sparse input.
@@ -596,19 +596,19 @@ def elkan_iter_chunked_sparse(
 
 
 cdef void _update_chunk_sparse(
-        floating[::1] X_data,                    # IN
-        int[::1] X_indices,                      # IN
-        int[::1] X_indptr,                       # IN
-        floating[::1] sample_weight,             # IN
-        floating[:, ::1] centers_old,            # IN
-        floating[::1] centers_squared_norms,     # IN
-        floating[:, ::1] center_half_distances,  # IN
-        floating[::1] distance_next_center,      # IN
-        int[::1] labels,                         # INOUT
-        floating[::1] upper_bounds,              # INOUT
-        floating[:, ::1] lower_bounds,           # INOUT
-        floating *centers_new,                   # OUT
-        floating *weight_in_clusters,            # OUT
+        const floating[::1] X_data,                    # IN
+        const int[::1] X_indices,                      # IN
+        const int[::1] X_indptr,                       # IN
+        const floating[::1] sample_weight,             # IN
+        const floating[:, ::1] centers_old,            # IN
+        const floating[::1] centers_squared_norms,     # IN
+        const floating[:, ::1] center_half_distances,  # IN
+        const floating[::1] distance_next_center,      # IN
+        int[::1] labels,                               # INOUT
+        floating[::1] upper_bounds,                    # INOUT
+        floating[:, ::1] lower_bounds,                 # INOUT
+        floating *centers_new,                         # OUT
+        floating *weight_in_clusters,                  # OUT
         bint update_centers) nogil:
     """K-means combined EM step for one sparse data chunk.