From 6d7236e9e5ad395459bb588dc85448015c7c431c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Carlos=20Alfaro=20Jim=C3=A9nez?= Date: Tue, 9 Jun 2020 19:35:27 +0200 Subject: [PATCH 1/4] DOC Fix documentation of the base module --- sklearn/base.py | 56 +++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 46398baabfd3a..318dd1f1d7936 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -146,7 +146,7 @@ def _pprint(params, offset=0, printer=repr): class BaseEstimator: - """Base class for all estimators in scikit-learn + """Base class for all estimators in scikit-learn. Notes ----- @@ -194,7 +194,7 @@ def get_params(self, deep=True): Returns ------- - params : mapping of string to any + params : dict Parameter names mapped to their values. """ out = dict() @@ -230,7 +230,7 @@ def set_params(self, **params): Returns ------- - self : object + self : BaseEstimator Estimator instance. """ if not params: @@ -481,7 +481,7 @@ def score(self, X, y, sample_weight=None): Test samples. y : array-like of shape (n_samples,) or (n_samples, n_outputs) - True labels for X. + True labels for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. @@ -489,7 +489,7 @@ def score(self, X, y, sample_weight=None): Returns ------- score : float - Mean accuracy of self.predict(X) wrt. y. + Mean accuracy of ``self.predict(X)`` wrt. `y`. """ from .metrics import accuracy_score return accuracy_score(y, self.predict(X), sample_weight=sample_weight) @@ -503,27 +503,28 @@ class RegressorMixin: _estimator_type = "regressor" def score(self, X, y, sample_weight=None): - """Return the coefficient of determination R^2 of the prediction. - - The coefficient R^2 is defined as (1 - u/v), where u is the residual - sum of squares ((y_true - y_pred) ** 2).sum() and v is the total - sum of squares ((y_true - y_true.mean()) ** 2).sum(). - The best possible score is 1.0 and it can be negative (because the - model can be arbitrarily worse). A constant model that always - predicts the expected value of y, disregarding the input features, - would get a R^2 score of 0.0. + """Return the coefficient of determination :math:`R^2` of the + prediction. + + The coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`, + where :math:`u` is the residual sum of squares ``((y_true - y_pred) + ** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true - + y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it + can be negative (because the model can be arbitrarily worse). A + constant model that always predicts the expected value of `y`, + disregarding the input features, would get a :math:`R^2` score of + 0.0. Parameters ---------- X : array-like of shape (n_samples, n_features) - Test samples. For some estimators this may be a - precomputed kernel matrix or a list of generic objects instead, - shape = (n_samples, n_samples_fitted), - where n_samples_fitted is the number of - samples used in the fitting for the estimator. + Test samples. For some estimators this may be a precomputed + kernel matrix or a list of generic objects instead, with shape + ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` + is the number of samples used in the fitting for the estimator. y : array-like of shape (n_samples,) or (n_samples, n_outputs) - True values for X. + True values for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. @@ -531,11 +532,11 @@ def score(self, X, y, sample_weight=None): Returns ------- score : float - R^2 of self.predict(X) wrt. y. + :math:`R^2` of ``self.predict(X)`` wrt. `y`. Notes ----- - The R2 score used when calling ``score`` on a regressor uses + The :math:`R^2` score used when calling ``score`` on a regressor uses ``multioutput='uniform_average'`` from version 0.23 to keep consistent with default value of :func:`~sklearn.metrics.r2_score`. This influences the ``score`` method of all the multioutput @@ -557,7 +558,7 @@ class ClusterMixin: def fit_predict(self, X, y=None): """ - Perform clustering on X and returns cluster labels. + Perform clustering on `X` and returns cluster labels. Parameters ---------- @@ -569,7 +570,7 @@ def fit_predict(self, X, y=None): Returns ------- - labels : ndarray of shape (n_samples,) + labels : ndarray of shape (n_samples,), dtype=np.int64 Cluster labels. """ # non-optimized default implementation; override when a better @@ -579,7 +580,7 @@ def fit_predict(self, X, y=None): class BiclusterMixin: - """Mixin class for all bicluster estimators in scikit-learn""" + """Mixin class for all bicluster estimators in scikit-learn.""" @property def biclusters_(self): @@ -669,7 +670,8 @@ def fit_transform(self, X, y=None, **fit_params): (n_samples, n_features) Input samples. - y : ndarray of shape (n_samples,), default=None + y : {array-like, sparse matrix, dataframe} of shape (n_samples,) \ + or (n_samples, n_outputs), default=None Target values (None for unsupervised transformations). **fit_params : dict @@ -695,7 +697,7 @@ class DensityMixin: _estimator_type = "DensityEstimator" def score(self, X, y=None): - """Return the score of the model on the data X + """Return the score of the model on the data X. Parameters ---------- From 8c0c1bb87d2c2577e46c72ef51384f7b1087e058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Carlos=20Alfaro=20Jim=C3=A9nez?= Date: Tue, 9 Jun 2020 22:14:53 +0200 Subject: [PATCH 2/4] DOC More changes --- sklearn/base.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 318dd1f1d7936..952435f383613 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -50,11 +50,11 @@ def clone(estimator, *, safe=True): Parameters ---------- - estimator : {list, tuple, set} of estimator objects or estimator object + estimator : list, tuple or set of BaseEstimator The estimator or group of estimators to be cloned. safe : bool, default=True - If safe is false, clone will fall back to a deep copy on objects + If safe is False, clone will fall back to a deep copy on objects that are not estimators. """ @@ -219,9 +219,9 @@ def set_params(self, **params): Set the parameters of this estimator. The method works on simple estimators as well as on nested objects - (such as pipelines). The latter have parameters of the form - ``__`` so that it's possible to update each - component of a nested object. + (such as :class:`~sklearn.pipeline.Pipeline`). The latter have + parameters of the form ``__`` so that it's + possible to update each component of a nested object. Parameters ---------- @@ -519,7 +519,7 @@ def score(self, X, y, sample_weight=None): ---------- X : array-like of shape (n_samples, n_features) Test samples. For some estimators this may be a precomputed - kernel matrix or a list of generic objects instead, with shape + kernel matrix or a list of generic objects instead with shape ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted`` is the number of samples used in the fitting for the estimator. @@ -661,8 +661,8 @@ def fit_transform(self, X, y=None, **fit_params): """ Fit to data, then transform it. - Fits transformer to X and y with optional parameters fit_params - and returns a transformed version of X. + Fits transformer to `X` and `y` with optional parameters `fit_params` + and returns a transformed version of `X`. Parameters ---------- @@ -778,7 +778,7 @@ def is_regressor(estimator): Parameters ---------- - estimator : object + estimator : BaseEstimator Estimator object to test. Returns @@ -794,7 +794,7 @@ def is_outlier_detector(estimator): Parameters ---------- - estimator : object + estimator : BaseEstimator Estimator object to test. Returns From c416f2bde502f2ea599c7266252870f9d4ddba95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Carlos=20Alfaro=20Jim=C3=A9nez?= Date: Wed, 10 Jun 2020 17:55:41 +0200 Subject: [PATCH 3/4] DOC Apply suggested changes --- sklearn/base.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 952435f383613..7f2b8965145a5 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -50,7 +50,7 @@ def clone(estimator, *, safe=True): Parameters ---------- - estimator : list, tuple or set of BaseEstimator + estimator : {list, tuple, set} of BaseEstimator or a single BaseEstimator The estimator or group of estimators to be cloned. safe : bool, default=True @@ -591,7 +591,7 @@ def biclusters_(self): return self.rows_, self.columns_ def get_indices(self, i): - """Row and column indices of the i'th bicluster. + """Row and column indices of the `i`'th bicluster. Only works if ``rows_`` and ``columns_`` attributes exist. @@ -613,7 +613,7 @@ def get_indices(self, i): return np.nonzero(rows)[0], np.nonzero(columns)[0] def get_shape(self, i): - """Shape of the i'th bicluster. + """Shape of the `i`'th bicluster. Parameters ---------- @@ -622,8 +622,11 @@ def get_shape(self, i): Returns ------- - shape : tuple (int, int) - Number of rows and columns (resp.) in the bicluster. + n_rows : int + Number of rows in the bicluster. + + n_cols : int + Number of columns in the bicluster. """ indices = self.get_indices(i) return tuple(len(i) for i in indices) @@ -635,13 +638,13 @@ def get_submatrix(self, i, data): ---------- i : int The index of the cluster. - data : array-like + data : array-like of shape (n_samples, n_features) The data. Returns ------- - submatrix : ndarray - The submatrix corresponding to bicluster i. + submatrix : ndarray of shape (n_rows, n_cols) + The submatrix corresponding to bicluster `i`. Notes ----- @@ -666,12 +669,11 @@ def fit_transform(self, X, y=None, **fit_params): Parameters ---------- - X : {array-like, sparse matrix, dataframe} of shape \ - (n_samples, n_features) + X : array-like of shape (n_samples, n_features) Input samples. - y : {array-like, sparse matrix, dataframe} of shape (n_samples,) \ - or (n_samples, n_outputs), default=None + y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ + default=None Target values (None for unsupervised transformations). **fit_params : dict @@ -697,11 +699,12 @@ class DensityMixin: _estimator_type = "DensityEstimator" def score(self, X, y=None): - """Return the score of the model on the data X. + """Return the score of the model on the data `X`. Parameters ---------- X : array-like of shape (n_samples, n_features) + Test samples. y : Ignored Not used, present for API consistency by convention. From a4d26a16e3c89163fdd4664701bc56479e4eb265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Carlos=20Alfaro=20Jim=C3=A9nez?= Date: Mon, 15 Jun 2020 18:06:30 +0200 Subject: [PATCH 4/4] MNT Use estimator instance instead of BaseEstimator --- sklearn/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 7f2b8965145a5..d94099436abe8 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -50,7 +50,8 @@ def clone(estimator, *, safe=True): Parameters ---------- - estimator : {list, tuple, set} of BaseEstimator or a single BaseEstimator + estimator : {list, tuple, set} of estimator instance or a single \ + estimator instance The estimator or group of estimators to be cloned. safe : bool, default=True @@ -230,7 +231,7 @@ def set_params(self, **params): Returns ------- - self : BaseEstimator + self : estimator instance Estimator instance. """ if not params: @@ -781,7 +782,7 @@ def is_regressor(estimator): Parameters ---------- - estimator : BaseEstimator + estimator : estimator instance Estimator object to test. Returns @@ -797,7 +798,7 @@ def is_outlier_detector(estimator): Parameters ---------- - estimator : BaseEstimator + estimator : estimator instance Estimator object to test. Returns