diff --git a/sklearn/base.py b/sklearn/base.py index 8ec3dd84f8504..f7a3116f221c4 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -50,11 +50,12 @@ def clone(estimator, *, safe=True): Parameters ---------- - estimator : {list, tuple, set} of estimator objects or estimator object + estimator : {list, tuple, set} of estimator instance or a single \ + estimator instance The estimator or group of estimators to be cloned. safe : bool, default=True - If safe is false, clone will fall back to a deep copy on objects + If safe is False, clone will fall back to a deep copy on objects that are not estimators. """ @@ -146,7 +147,7 @@ def _pprint(params, offset=0, printer=repr): class BaseEstimator: - """Base class for all estimators in scikit-learn + """Base class for all estimators in scikit-learn. Notes ----- @@ -194,7 +195,7 @@ def get_params(self, deep=True): Returns ------- - params : mapping of string to any + params : dict Parameter names mapped to their values. """ out = dict() @@ -219,9 +220,9 @@ def set_params(self, **params): Set the parameters of this estimator. The method works on simple estimators as well as on nested objects - (such as pipelines). The latter have parameters of the form - ``__`` so that it's possible to update each - component of a nested object. + (such as :class:`~sklearn.pipeline.Pipeline`). The latter have + parameters of the form ``__`` so that it's + possible to update each component of a nested object. Parameters ---------- @@ -230,7 +231,7 @@ def set_params(self, **params): Returns ------- - self : object + self : estimator instance Estimator instance. """ if not params: @@ -481,7 +482,7 @@ def score(self, X, y, sample_weight=None): Test samples. y : array-like of shape (n_samples,) or (n_samples, n_outputs) - True labels for X. + True labels for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. @@ -489,7 +490,7 @@ def score(self, X, y, sample_weight=None): Returns ------- score : float - Mean accuracy of self.predict(X) wrt. y. + Mean accuracy of ``self.predict(X)`` wrt. `y`. """ from .metrics import accuracy_score return accuracy_score(y, self.predict(X), sample_weight=sample_weight) @@ -503,27 +504,28 @@ class RegressorMixin: _estimator_type = "regressor" def score(self, X, y, sample_weight=None): - """Return the coefficient of determination R^2 of the prediction. - - The coefficient R^2 is defined as (1 - u/v), where u is the residual - sum of squares ((y_true - y_pred) ** 2).sum() and v is the total - sum of squares ((y_true - y_true.mean()) ** 2).sum(). - The best possible score is 1.0 and it can be negative (because the - model can be arbitrarily worse). A constant model that always - predicts the expected value of y, disregarding the input features, - would get a R^2 score of 0.0. + """Return the coefficient of determination :math:`R^2` of the + prediction. + + The coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`, + where :math:`u` is the residual sum of squares ``((y_true - y_pred) + ** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true - + y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it + can be negative (because the model can be arbitrarily worse). A + constant model that always predicts the expected value of `y`, + disregarding the input features, would get a :math:`R^2` score of + 0.0. Parameters ---------- X : array-like of shape (n_samples, n_features) - Test samples. For some estimators this may be a - precomputed kernel matrix or a list of generic objects instead, - shape = (n_samples, n_samples_fitted), - where n_samples_fitted is the number of - samples used in the fitting for the estimator. + Test samples. For some estimators this may be a precomputed + kernel matrix or a list of generic objects instead with shape + ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` + is the number of samples used in the fitting for the estimator. y : array-like of shape (n_samples,) or (n_samples, n_outputs) - True values for X. + True values for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. @@ -531,11 +533,11 @@ def score(self, X, y, sample_weight=None): Returns ------- score : float - R^2 of self.predict(X) wrt. y. + :math:`R^2` of ``self.predict(X)`` wrt. `y`. Notes ----- - The R2 score used when calling ``score`` on a regressor uses + The :math:`R^2` score used when calling ``score`` on a regressor uses ``multioutput='uniform_average'`` from version 0.23 to keep consistent with default value of :func:`~sklearn.metrics.r2_score`. This influences the ``score`` method of all the multioutput @@ -557,7 +559,7 @@ class ClusterMixin: def fit_predict(self, X, y=None): """ - Perform clustering on X and returns cluster labels. + Perform clustering on `X` and returns cluster labels. Parameters ---------- @@ -569,7 +571,7 @@ def fit_predict(self, X, y=None): Returns ------- - labels : ndarray of shape (n_samples,) + labels : ndarray of shape (n_samples,), dtype=np.int64 Cluster labels. """ # non-optimized default implementation; override when a better @@ -579,7 +581,7 @@ def fit_predict(self, X, y=None): class BiclusterMixin: - """Mixin class for all bicluster estimators in scikit-learn""" + """Mixin class for all bicluster estimators in scikit-learn.""" @property def biclusters_(self): @@ -590,7 +592,7 @@ def biclusters_(self): return self.rows_, self.columns_ def get_indices(self, i): - """Row and column indices of the i'th bicluster. + """Row and column indices of the `i`'th bicluster. Only works if ``rows_`` and ``columns_`` attributes exist. @@ -612,7 +614,7 @@ def get_indices(self, i): return np.nonzero(rows)[0], np.nonzero(columns)[0] def get_shape(self, i): - """Shape of the i'th bicluster. + """Shape of the `i`'th bicluster. Parameters ---------- @@ -621,8 +623,11 @@ def get_shape(self, i): Returns ------- - shape : tuple (int, int) - Number of rows and columns (resp.) in the bicluster. + n_rows : int + Number of rows in the bicluster. + + n_cols : int + Number of columns in the bicluster. """ indices = self.get_indices(i) return tuple(len(i) for i in indices) @@ -634,13 +639,13 @@ def get_submatrix(self, i, data): ---------- i : int The index of the cluster. - data : array-like + data : array-like of shape (n_samples, n_features) The data. Returns ------- - submatrix : ndarray - The submatrix corresponding to bicluster i. + submatrix : ndarray of shape (n_rows, n_cols) + The submatrix corresponding to bicluster `i`. Notes ----- @@ -660,16 +665,16 @@ def fit_transform(self, X, y=None, **fit_params): """ Fit to data, then transform it. - Fits transformer to X and y with optional parameters fit_params - and returns a transformed version of X. + Fits transformer to `X` and `y` with optional parameters `fit_params` + and returns a transformed version of `X`. Parameters ---------- - X : {array-like, sparse matrix, dataframe} of shape \ - (n_samples, n_features) + X : array-like of shape (n_samples, n_features) Input samples. - y : ndarray of shape (n_samples,), default=None + y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ + default=None Target values (None for unsupervised transformations). **fit_params : dict @@ -695,11 +700,12 @@ class DensityMixin: _estimator_type = "DensityEstimator" def score(self, X, y=None): - """Return the score of the model on the data X + """Return the score of the model on the data `X`. Parameters ---------- X : array-like of shape (n_samples, n_features) + Test samples. y : Ignored Not used, present for API consistency by convention. @@ -776,7 +782,7 @@ def is_regressor(estimator): Parameters ---------- - estimator : object + estimator : estimator instance Estimator object to test. Returns @@ -792,7 +798,7 @@ def is_outlier_detector(estimator): Parameters ---------- - estimator : object + estimator : estimator instance Estimator object to test. Returns