diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py index 41465c1c98a5e..18d80f311e032 100644 --- a/sklearn/cluster/affinity_propagation_.py +++ b/sklearn/cluster/affinity_propagation_.py @@ -351,17 +351,22 @@ def _pairwise(self): return self.affinity == "precomputed" def fit(self, X, y=None): - """ Create affinity matrix from negative euclidean distances, then - apply affinity propagation clustering. + """Fit the clustering from features, or affinity matrix. Parameters ---------- - - X : array-like, shape (n_samples, n_features) or (n_samples, n_samples) - Data matrix or, if affinity is ``precomputed``, matrix of - similarities / affinities. + X : array-like or sparse matrix, shape (n_samples, n_features), or \ + array-like, shape (n_samples, n_samples) + Training instances to cluster, or similarities / affinities between + instances if ``affinity='precomputed'``. If a sparse feature matrix + is provided, it will be converted into a sparse ``csr_matrix``. y : Ignored + Not used, present here for API consistency by convention. + + Returns + ------- + self """ if self.affinity == "precomputed": @@ -394,13 +399,14 @@ def predict(self, X): Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) - New data to predict. + X : array-like or sparse matrix, shape (n_samples, n_features) + New data to predict. If a sparse matrix is provided, it will be + converted into a sparse ``csr_matrix``. Returns ------- - labels : array, shape (n_samples,) - Index of the cluster each sample belongs to. + labels : ndarray, shape (n_samples,) + Cluster labels. """ check_is_fitted(self, "cluster_centers_indices_") if not hasattr(self, "cluster_centers_"): @@ -414,3 +420,25 @@ def predict(self, X): "because affinity propagation did not converge. " "Labeling every sample as '-1'.", ConvergenceWarning) return np.array([-1] * X.shape[0]) + + def fit_predict(self, X, y=None): + """Fit the clustering from features or affinity matrix, and return + cluster labels. + + Parameters + ---------- + X : array-like or sparse matrix, shape (n_samples, n_features), or \ + array-like, shape (n_samples, n_samples) + Training instances to cluster, or similarities / affinities between + instances if ``affinity='precomputed'``. If a sparse feature matrix + is provided, it will be converted into a sparse ``csr_matrix``. + + y : Ignored + Not used, present here for API consistency by convention. + + Returns + ------- + labels : ndarray, shape (n_samples,) + Cluster labels. + """ + return super().fit_predict(X, y) diff --git a/sklearn/cluster/dbscan_.py b/sklearn/cluster/dbscan_.py index f586919577678..1696a9119bd63 100644 --- a/sklearn/cluster/dbscan_.py +++ b/sklearn/cluster/dbscan_.py @@ -329,21 +329,28 @@ def __init__(self, eps=0.5, min_samples=5, metric='euclidean', self.n_jobs = n_jobs def fit(self, X, y=None, sample_weight=None): - """Perform DBSCAN clustering from features or distance matrix. + """Perform DBSCAN clustering from features, or distance matrix. Parameters ---------- - X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \ - array of shape (n_samples, n_samples) - A feature array, or array of distances between samples if - ``metric='precomputed'``. + X : array-like or sparse matrix, shape (n_samples, n_features), or \ + (n_samples, n_samples) + Training instances to cluster, or distances between instances if + ``metric='precomputed'``. If a sparse matrix is provided, it will + be converted into a sparse ``csr_matrix``. + sample_weight : array, shape (n_samples,), optional Weight of each sample, such that a sample with a weight of at least - ``min_samples`` is by itself a core sample; a sample with negative - weight may inhibit its eps-neighbor from being core. + ``min_samples`` is by itself a core sample; a sample with a + negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. y : Ignored + Not used, present here for API consistency by convention. + + Returns + ------- + self """ X = check_array(X, accept_sparse='csr') @@ -359,26 +366,30 @@ def fit(self, X, y=None, sample_weight=None): return self def fit_predict(self, X, y=None, sample_weight=None): - """Performs clustering on X and returns cluster labels. + """Perform DBSCAN clustering from features or distance matrix, + and return cluster labels. Parameters ---------- - X : array or sparse (CSR) matrix of shape (n_samples, n_features), or \ - array of shape (n_samples, n_samples) - A feature array, or array of distances between samples if - ``metric='precomputed'``. + X : array-like or sparse matrix, shape (n_samples, n_features), or \ + (n_samples, n_samples) + Training instances to cluster, or distances between instances if + ``metric='precomputed'``. If a sparse matrix is provided, it will + be converted into a sparse ``csr_matrix``. + sample_weight : array, shape (n_samples,), optional Weight of each sample, such that a sample with a weight of at least - ``min_samples`` is by itself a core sample; a sample with negative - weight may inhibit its eps-neighbor from being core. + ``min_samples`` is by itself a core sample; a sample with a + negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. y : Ignored + Not used, present here for API consistency by convention. Returns ------- - y : ndarray, shape (n_samples,) - cluster labels + labels : ndarray, shape (n_samples,) + Cluster labels. Noisy samples are given the label -1. """ self.fit(X, sample_weight=sample_weight) return self.labels_ diff --git a/sklearn/cluster/hierarchical.py b/sklearn/cluster/hierarchical.py index 8f8857c1818cd..be39f1037e046 100644 --- a/sklearn/cluster/hierarchical.py +++ b/sklearn/cluster/hierarchical.py @@ -773,15 +773,16 @@ def n_components_(self): return self.n_connected_components_ def fit(self, X, y=None): - """Fit the hierarchical clustering on the data + """Fit the hierarchical clustering from features, or distance matrix. Parameters ---------- - X : array-like, shape = [n_samples, n_features] - Training data. Shape [n_samples, n_features], or [n_samples, - n_samples] if affinity=='precomputed'. + X : array-like, shape (n_samples, n_features) or (n_samples, n_samples) + Training instances to cluster, or distances between instances if + ``affinity='precomputed'``. y : Ignored + Not used, present here for API consistency by convention. Returns ------- @@ -875,6 +876,26 @@ def fit(self, X, y=None): self.labels_ = np.searchsorted(np.unique(labels), labels) return self + def fit_predict(self, X, y=None): + """Fit the hierarchical clustering from features or distance matrix, + and return cluster labels. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) or (n_samples, n_samples) + Training instances to cluster, or distances between instances if + ``affinity='precomputed'``. + + y : Ignored + Not used, present here for API consistency by convention. + + Returns + ------- + labels : ndarray, shape (n_samples,) + Cluster labels. + """ + return super().fit_predict(X, y) + class FeatureAgglomeration(AgglomerativeClustering, AgglomerationTransform): """Agglomerate features. diff --git a/sklearn/cluster/spectral.py b/sklearn/cluster/spectral.py index e737a51efbd5f..6e6c8227d3ea0 100644 --- a/sklearn/cluster/spectral.py +++ b/sklearn/cluster/spectral.py @@ -448,16 +448,24 @@ def __init__(self, n_clusters=8, eigen_solver=None, n_components=None, self.n_jobs = n_jobs def fit(self, X, y=None): - """Creates an affinity matrix for X using the selected affinity, - then applies spectral clustering to this affinity matrix. + """Perform spectral clustering from features, or affinity matrix. Parameters ---------- - X : array-like or sparse matrix, shape (n_samples, n_features) - OR, if affinity==`precomputed`, a precomputed affinity - matrix of shape (n_samples, n_samples) + X : array-like or sparse matrix, shape (n_samples, n_features), or \ + array-like, shape (n_samples, n_samples) + Training instances to cluster, or similarities / affinities between + instances if ``affinity='precomputed'``. If a sparse matrix is + provided in a format other than ``csr_matrix``, ``csc_matrix``, + or ``coo_matrix``, it will be converted into a sparse + ``csr_matrix``. y : Ignored + Not used, present here for API consistency by convention. + + Returns + ------- + self """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo'], @@ -498,6 +506,30 @@ def fit(self, X, y=None): assign_labels=self.assign_labels) return self + def fit_predict(self, X, y=None): + """Perform spectral clustering from features, or affinity matrix, + and return cluster labels. + + Parameters + ---------- + X : array-like or sparse matrix, shape (n_samples, n_features), or \ + array-like, shape (n_samples, n_samples) + Training instances to cluster, or similarities / affinities between + instances if ``affinity='precomputed'``. If a sparse matrix is + provided in a format other than ``csr_matrix``, ``csc_matrix``, + or ``coo_matrix``, it will be converted into a sparse + ``csr_matrix``. + + y : Ignored + Not used, present here for API consistency by convention. + + Returns + ------- + labels : ndarray, shape (n_samples,) + Cluster labels. + """ + return super().fit_predict(X, y) + @property def _pairwise(self): return self.affinity == "precomputed"