Skip to content

Commit d5715fb

Browse files
authored
ENH use np.cumsum instead of stable_cumsum in kmeans++ (scikit-learn#31991)
1 parent 492e1ec commit d5715fb

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- :func:`cluster.kmeans_plusplus` now uses `np.cumsum` directly without extra
2+
numerical stability checks and without casting to `np.float64`.
3+
By :user:`Tiziano Zito <otizonaizit>`

sklearn/cluster/_kmeans.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from sklearn.utils import check_array, check_random_state
4343
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
4444
from sklearn.utils._param_validation import Interval, StrOptions, validate_params
45-
from sklearn.utils.extmath import row_norms, stable_cumsum
45+
from sklearn.utils.extmath import row_norms
4646
from sklearn.utils.parallel import (
4747
_get_threadpool_controller,
4848
_threadpool_controller_decorator,
@@ -248,7 +248,7 @@ def _kmeans_plusplus(
248248
# to the squared distance to the closest existing center
249249
rand_vals = random_state.uniform(size=n_local_trials) * current_pot
250250
candidate_ids = np.searchsorted(
251-
stable_cumsum(sample_weight * closest_dist_sq), rand_vals
251+
np.cumsum(sample_weight * closest_dist_sq), rand_vals
252252
)
253253
# XXX: numerical imprecision can result in a candidate_id out of range
254254
np.clip(candidate_ids, None, closest_dist_sq.size - 1, out=candidate_ids)

0 commit comments

Comments
 (0)