@@ -85,6 +85,53 @@ def _labels_inertia_precompute_dense(norm, X, sample_weight, centers, distances)
85
85
return labels , inertia
86
86
87
87
88
+ def _assign_labels_csr (X , sample_weight , x_squared_norms , centers ,
89
+ labels , distances ):
90
+ """Compute label assignment and inertia for a CSR input
91
+ Return the inertia (sum of squared distances to the centers).
92
+ """
93
+ X_indices = X .indices
94
+ X_indptr = X .indptr
95
+ n_clusters = centers .shape [0 ]
96
+ n_features = centers .shape [1 ]
97
+ n_samples = X .shape [0 ]
98
+ store_distances = 0
99
+ inertia = 0.0
100
+
101
+ if floating is float :
102
+ center_squared_norms = numpy .zeros (n_clusters , dtype = np .float32 )
103
+ else :
104
+ center_squared_norms = numpy .zeros (n_clusters , dtype = np .float64 )
105
+
106
+ if n_samples == distances .shape [0 ]:
107
+ store_distances = 1
108
+
109
+ for center_idx in range (n_clusters ):
110
+ center_squared_norms [center_idx ] = numpy .dot (
111
+ centers [center_idx , :], centers [center_idx , :])
112
+
113
+ for sample_idx in range (n_samples ):
114
+ min_dist = - 1
115
+ for center_idx in range (n_clusters ):
116
+ dist = 0.0
117
+ # hardcoded: minimize euclidean distance to cluster center:
118
+ # ||a - b||^2 = ||a||^2 + ||b||^2 -2 <a, b>
119
+ for k in range (X_indptr [sample_idx ], X_indptr [sample_idx + 1 ]):
120
+ dist += centers [center_idx , X_indices [k ]] * X [k ]
121
+ dist *= - 2
122
+ dist += center_squared_norms [center_idx ]
123
+ dist += x_squared_norms [sample_idx ]
124
+ dist *= sample_weight [sample_idx ]
125
+ if min_dist == - 1 or dist < min_dist :
126
+ min_dist = dist
127
+ labels [sample_idx ] = center_idx
128
+ if store_distances :
129
+ distances [sample_idx ] = dist
130
+ inertia += min_dist
131
+
132
+ return inertia
133
+
134
+
88
135
def _labels_inertia_skl (X , sample_weight , x_squared_norms , centers ,
89
136
precompute_distances = True , distances = None ):
90
137
"""E step of the K-means EM algorithm.
0 commit comments