diff --git a/sklearn/cluster/_k_means.py b/sklearn/cluster/_k_means.py index 52f2b5fee4dac..f470d61423b2c 100644 --- a/sklearn/cluster/_k_means.py +++ b/sklearn/cluster/_k_means.py @@ -654,11 +654,12 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator): Parameters ---------- - n_clusters : int, optional, default: 8 + n_clusters : int, default=8 The number of clusters to form as well as the number of centroids to generate. - init : {'k-means++', 'random' or an ndarray} + init : {'k-means++', 'random'} or ndarray of shape \ + (n_clusters, n_features), default='k-means++' Method for initialization, defaults to 'k-means++': 'k-means++' : selects initial cluster centers for k-mean @@ -671,19 +672,19 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator): If an ndarray is passed, it should be of shape (n_clusters, n_features) and gives the initial centers. - n_init : int, default: 10 + n_init : int, default=10 Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. - max_iter : int, default: 300 + max_iter : int, default=300 Maximum number of iterations of the k-means algorithm for a single run. - tol : float, default: 1e-4 + tol : float, default=1e-4 Relative tolerance with regards to inertia to declare convergence. - precompute_distances : {'auto', True, False} + precompute_distances : 'auto' or bool, default='auto' Precompute distances (faster but takes more memory). 'auto' : do not precompute distances if n_samples * n_clusters > 12 @@ -694,15 +695,15 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator): False : never precompute distances. - verbose : int, default 0 + verbose : int, default=0 Verbosity mode. - random_state : int, RandomState instance or None (default) + random_state : int, RandomState instance, default=None Determines random number generation for centroid initialization. Use an int to make the randomness deterministic. See :term:`Glossary `. - copy_x : bool, optional + copy_x : bool, default=True When pre-computing distances it is more numerically accurate to center the data first. If copy_x is True (default), then the original data is not modified, ensuring X is C-contiguous. If False, the original data @@ -711,7 +712,7 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator): the data mean, in this case it will also not ensure that data is C-contiguous which may cause a significant slowdown. - n_jobs : int or None, optional (default=None) + n_jobs : int, default=None The number of jobs to use for the computation. This works by computing each of the n_init runs in parallel. @@ -719,7 +720,7 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator): ``-1`` means using all processors. See :term:`Glossary ` for more details. - algorithm : "auto", "full" or "elkan", default="auto" + algorithm : {"auto", "full", "elkan"}, default="auto" K-means algorithm to use. The classical EM-style algorithm is "full". The "elkan" variation is more efficient by using the triangle inequality, but currently doesn't support sparse data. "auto" chooses @@ -727,12 +728,12 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator): Attributes ---------- - cluster_centers_ : array, [n_clusters, n_features] + cluster_centers_ : ndarray of shape (n_clusters, n_features) Coordinates of cluster centers. If the algorithm stops before fully converging (see ``tol`` and ``max_iter``), these will not be consistent with ``labels_``. - labels_ : array, shape (n_samples,) + labels_ : ndarray of shape (n_samples,) Labels of each point inertia_ : float