Skip to content

[MRG][DOC] Fix inconsistencies in clustering doc. #13946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 30, 2019
48 changes: 38 additions & 10 deletions sklearn/cluster/affinity_propagation_.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,17 +351,22 @@ def _pairwise(self):
return self.affinity == "precomputed"

def fit(self, X, y=None):
""" Create affinity matrix from negative euclidean distances, then
apply affinity propagation clustering.
"""Fit the clustering from features, or affinity matrix.

Parameters
----------

X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)
Data matrix or, if affinity is ``precomputed``, matrix of
similarities / affinities.
X : array-like or sparse matrix, shape (n_samples, n_features), or \
array-like, shape (n_samples, n_samples)
Training instances to cluster, or similarities / affinities between
instances if ``affinity='precomputed'``. If a sparse feature matrix
is provided, it will be converted into a sparse ``csr_matrix``.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
self

"""
if self.affinity == "precomputed":
Expand Down Expand Up @@ -394,13 +399,14 @@ def predict(self, X):

Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
New data to predict.
X : array-like or sparse matrix, shape (n_samples, n_features)
New data to predict. If a sparse matrix is provided, it will be
converted into a sparse ``csr_matrix``.

Returns
-------
labels : array, shape (n_samples,)
Index of the cluster each sample belongs to.
labels : ndarray, shape (n_samples,)
Cluster labels.
"""
check_is_fitted(self, "cluster_centers_indices_")
if not hasattr(self, "cluster_centers_"):
Expand All @@ -414,3 +420,25 @@ def predict(self, X):
"because affinity propagation did not converge. "
"Labeling every sample as '-1'.", ConvergenceWarning)
return np.array([-1] * X.shape[0])

def fit_predict(self, X, y=None):
"""Fit the clustering from features or affinity matrix, and return
cluster labels.

Parameters
----------
X : array-like or sparse matrix, shape (n_samples, n_features), or \
array-like, shape (n_samples, n_samples)
Training instances to cluster, or similarities / affinities between
instances if ``affinity='precomputed'``. If a sparse feature matrix
is provided, it will be converted into a sparse ``csr_matrix``.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
labels : ndarray, shape (n_samples,)
Cluster labels.
"""
return super().fit_predict(X, y)
43 changes: 27 additions & 16 deletions sklearn/cluster/dbscan_.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,21 +329,28 @@ def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
self.n_jobs = n_jobs

def fit(self, X, y=None, sample_weight=None):
"""Perform DBSCAN clustering from features or distance matrix.
"""Perform DBSCAN clustering from features, or distance matrix.

Parameters
----------
X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \
array of shape (n_samples, n_samples)
A feature array, or array of distances between samples if
``metric='precomputed'``.
X : array-like or sparse matrix, shape (n_samples, n_features), or \
(n_samples, n_samples)
Training instances to cluster, or distances between instances if
``metric='precomputed'``. If a sparse matrix is provided, it will
be converted into a sparse ``csr_matrix``.

sample_weight : array, shape (n_samples,), optional
Weight of each sample, such that a sample with a weight of at least
``min_samples`` is by itself a core sample; a sample with negative
weight may inhibit its eps-neighbor from being core.
``min_samples`` is by itself a core sample; a sample with a
negative weight may inhibit its eps-neighbor from being core.
Note that weights are absolute, and default to 1.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
self

"""
X = check_array(X, accept_sparse='csr')
Expand All @@ -359,26 +366,30 @@ def fit(self, X, y=None, sample_weight=None):
return self

def fit_predict(self, X, y=None, sample_weight=None):
"""Performs clustering on X and returns cluster labels.
"""Perform DBSCAN clustering from features or distance matrix,
and return cluster labels.

Parameters
----------
X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \
array of shape (n_samples, n_samples)
A feature array, or array of distances between samples if
``metric='precomputed'``.
X : array-like or sparse matrix, shape (n_samples, n_features), or \
(n_samples, n_samples)
Training instances to cluster, or distances between instances if
``metric='precomputed'``. If a sparse matrix is provided, it will
be converted into a sparse ``csr_matrix``.

sample_weight : array, shape (n_samples,), optional
Weight of each sample, such that a sample with a weight of at least
``min_samples`` is by itself a core sample; a sample with negative
weight may inhibit its eps-neighbor from being core.
``min_samples`` is by itself a core sample; a sample with a
negative weight may inhibit its eps-neighbor from being core.
Note that weights are absolute, and default to 1.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
y : ndarray, shape (n_samples,)
cluster labels
labels : ndarray, shape (n_samples,)
Cluster labels. Noisy samples are given the label -1.
"""
self.fit(X, sample_weight=sample_weight)
return self.labels_
29 changes: 25 additions & 4 deletions sklearn/cluster/hierarchical.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,15 +773,16 @@ def n_components_(self):
return self.n_connected_components_

def fit(self, X, y=None):
"""Fit the hierarchical clustering on the data
"""Fit the hierarchical clustering from features, or distance matrix.

Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training data. Shape [n_samples, n_features], or [n_samples,
n_samples] if affinity=='precomputed'.
X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)
Training instances to cluster, or distances between instances if
``affinity='precomputed'``.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
Expand Down Expand Up @@ -875,6 +876,26 @@ def fit(self, X, y=None):
self.labels_ = np.searchsorted(np.unique(labels), labels)
return self

def fit_predict(self, X, y=None):
"""Fit the hierarchical clustering from features or distance matrix,
and return cluster labels.

Parameters
----------
X : array-like, shape (n_samples, n_features) or (n_samples, n_samples)
Training instances to cluster, or distances between instances if
``affinity='precomputed'``.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
labels : ndarray, shape (n_samples,)
Cluster labels.
"""
return super().fit_predict(X, y)


class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform):
"""Agglomerate features.
Expand Down
42 changes: 37 additions & 5 deletions sklearn/cluster/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,16 +448,24 @@ def __init__(self, n_clusters=8, eigen_solver=None, n_components=None,
self.n_jobs = n_jobs

def fit(self, X, y=None):
"""Creates an affinity matrix for X using the selected affinity,
then applies spectral clustering to this affinity matrix.
"""Perform spectral clustering from features, or affinity matrix.

Parameters
----------
X : array-like or sparse matrix, shape (n_samples, n_features)
OR, if affinity==`precomputed`, a precomputed affinity
matrix of shape (n_samples, n_samples)
X : array-like or sparse matrix, shape (n_samples, n_features), or \
array-like, shape (n_samples, n_samples)
Training instances to cluster, or similarities / affinities between
instances if ``affinity='precomputed'``. If a sparse matrix is
provided in a format other than ``csr_matrix``, ``csc_matrix``,
or ``coo_matrix``, it will be converted into a sparse
``csr_matrix``.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
self

"""
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
Expand Down Expand Up @@ -498,6 +506,30 @@ def fit(self, X, y=None):
assign_labels=self.assign_labels)
return self

def fit_predict(self, X, y=None):
"""Perform spectral clustering from features, or affinity matrix,
and return cluster labels.

Parameters
----------
X : array-like or sparse matrix, shape (n_samples, n_features), or \
array-like, shape (n_samples, n_samples)
Training instances to cluster, or similarities / affinities between
instances if ``affinity='precomputed'``. If a sparse matrix is
provided in a format other than ``csr_matrix``, ``csc_matrix``,
or ``coo_matrix``, it will be converted into a sparse
``csr_matrix``.

y : Ignored
Not used, present here for API consistency by convention.

Returns
-------
labels : ndarray, shape (n_samples,)
Cluster labels.
"""
return super().fit_predict(X, y)

@property
def _pairwise(self):
return self.affinity == "precomputed"