Skip to content

Commit 15df842

Browse files
committed
pep8
1 parent f6fe230 commit 15df842

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

mlinsights/mlmodel/kmeans_l1.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,53 @@ def _labels_inertia_precompute_dense(norm, X, sample_weight, centers, distances)
8585
return labels, inertia
8686

8787

88+
def _assign_labels_csr(X, sample_weight, x_squared_norms, centers,
89+
labels, distances):
90+
"""Compute label assignment and inertia for a CSR input
91+
Return the inertia (sum of squared distances to the centers).
92+
"""
93+
X_indices = X.indices
94+
X_indptr = X.indptr
95+
n_clusters = centers.shape[0]
96+
n_features = centers.shape[1]
97+
n_samples = X.shape[0]
98+
store_distances = 0
99+
inertia = 0.0
100+
101+
if floating is float:
102+
center_squared_norms = numpy.zeros(n_clusters, dtype=np.float32)
103+
else:
104+
center_squared_norms = numpy.zeros(n_clusters, dtype=np.float64)
105+
106+
if n_samples == distances.shape[0]:
107+
store_distances = 1
108+
109+
for center_idx in range(n_clusters):
110+
center_squared_norms[center_idx] = numpy.dot(
111+
centers[center_idx, :], centers[center_idx, :])
112+
113+
for sample_idx in range(n_samples):
114+
min_dist = -1
115+
for center_idx in range(n_clusters):
116+
dist = 0.0
117+
# hardcoded: minimize euclidean distance to cluster center:
118+
# ||a - b||^2 = ||a||^2 + ||b||^2 -2 <a, b>
119+
for k in range(X_indptr[sample_idx], X_indptr[sample_idx + 1]):
120+
dist += centers[center_idx, X_indices[k]] * X[k]
121+
dist *= -2
122+
dist += center_squared_norms[center_idx]
123+
dist += x_squared_norms[sample_idx]
124+
dist *= sample_weight[sample_idx]
125+
if min_dist == -1 or dist < min_dist:
126+
min_dist = dist
127+
labels[sample_idx] = center_idx
128+
if store_distances:
129+
distances[sample_idx] = dist
130+
inertia += min_dist
131+
132+
return inertia
133+
134+
88135
def _labels_inertia_skl(X, sample_weight, x_squared_norms, centers,
89136
precompute_distances=True, distances=None):
90137
"""E step of the K-means EM algorithm.

0 commit comments

Comments
 (0)