Skip to content

DOC Fix documentation of the base module #17548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 21, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 51 additions & 45 deletions sklearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ def clone(estimator, *, safe=True):

Parameters
----------
estimator : {list, tuple, set} of estimator objects or estimator object
estimator : {list, tuple, set} of estimator instance or a single \
estimator instance
The estimator or group of estimators to be cloned.

safe : bool, default=True
If safe is false, clone will fall back to a deep copy on objects
If safe is False, clone will fall back to a deep copy on objects
that are not estimators.

"""
Expand Down Expand Up @@ -146,7 +147,7 @@ def _pprint(params, offset=0, printer=repr):


class BaseEstimator:
"""Base class for all estimators in scikit-learn
"""Base class for all estimators in scikit-learn.

Notes
-----
Expand Down Expand Up @@ -194,7 +195,7 @@ def get_params(self, deep=True):

Returns
-------
params : mapping of string to any
params : dict
Parameter names mapped to their values.
"""
out = dict()
Expand All @@ -219,9 +220,9 @@ def set_params(self, **params):
Set the parameters of this estimator.

The method works on simple estimators as well as on nested objects
(such as pipelines). The latter have parameters of the form
``<component>__<parameter>`` so that it's possible to update each
component of a nested object.
(such as :class:`~sklearn.pipeline.Pipeline`). The latter have
parameters of the form ``<component>__<parameter>`` so that it's
possible to update each component of a nested object.

Parameters
----------
Expand All @@ -230,7 +231,7 @@ def set_params(self, **params):

Returns
-------
self : object
self : estimator instance
Estimator instance.
"""
if not params:
Expand Down Expand Up @@ -481,15 +482,15 @@ def score(self, X, y, sample_weight=None):
Test samples.

y : array-like of shape (n_samples,) or (n_samples, n_outputs)
True labels for X.
True labels for `X`.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights.

Returns
-------
score : float
Mean accuracy of self.predict(X) wrt. y.
Mean accuracy of ``self.predict(X)`` wrt. `y`.
"""
from .metrics import accuracy_score
return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
Expand All @@ -503,39 +504,40 @@ class RegressorMixin:
_estimator_type = "regressor"

def score(self, X, y, sample_weight=None):
"""Return the coefficient of determination R^2 of the prediction.

The coefficient R^2 is defined as (1 - u/v), where u is the residual
sum of squares ((y_true - y_pred) ** 2).sum() and v is the total
sum of squares ((y_true - y_true.mean()) ** 2).sum().
The best possible score is 1.0 and it can be negative (because the
model can be arbitrarily worse). A constant model that always
predicts the expected value of y, disregarding the input features,
would get a R^2 score of 0.0.
"""Return the coefficient of determination :math:`R^2` of the
prediction.

The coefficient :math:`R^2` is defined as :math:`(1 - \\frac{u}{v})`,
where :math:`u` is the residual sum of squares ``((y_true - y_pred)
** 2).sum()`` and :math:`v` is the total sum of squares ``((y_true -
y_true.mean()) ** 2).sum()``. The best possible score is 1.0 and it
can be negative (because the model can be arbitrarily worse). A
constant model that always predicts the expected value of `y`,
disregarding the input features, would get a :math:`R^2` score of
0.0.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples. For some estimators this may be a
precomputed kernel matrix or a list of generic objects instead,
shape = (n_samples, n_samples_fitted),
where n_samples_fitted is the number of
samples used in the fitting for the estimator.
Test samples. For some estimators this may be a precomputed
kernel matrix or a list of generic objects instead with shape
``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
is the number of samples used in the fitting for the estimator.

y : array-like of shape (n_samples,) or (n_samples, n_outputs)
True values for X.
True values for `X`.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights.

Returns
-------
score : float
R^2 of self.predict(X) wrt. y.
:math:`R^2` of ``self.predict(X)`` wrt. `y`.

Notes
-----
The R2 score used when calling ``score`` on a regressor uses
The :math:`R^2` score used when calling ``score`` on a regressor uses
``multioutput='uniform_average'`` from version 0.23 to keep consistent
with default value of :func:`~sklearn.metrics.r2_score`.
This influences the ``score`` method of all the multioutput
Expand All @@ -557,7 +559,7 @@ class ClusterMixin:

def fit_predict(self, X, y=None):
"""
Perform clustering on X and returns cluster labels.
Perform clustering on `X` and returns cluster labels.

Parameters
----------
Expand All @@ -569,7 +571,7 @@ def fit_predict(self, X, y=None):

Returns
-------
labels : ndarray of shape (n_samples,)
labels : ndarray of shape (n_samples,), dtype=np.int64
Cluster labels.
"""
# non-optimized default implementation; override when a better
Expand All @@ -579,7 +581,7 @@ def fit_predict(self, X, y=None):


class BiclusterMixin:
"""Mixin class for all bicluster estimators in scikit-learn"""
"""Mixin class for all bicluster estimators in scikit-learn."""

@property
def biclusters_(self):
Expand All @@ -590,7 +592,7 @@ def biclusters_(self):
return self.rows_, self.columns_

def get_indices(self, i):
"""Row and column indices of the i'th bicluster.
"""Row and column indices of the `i`'th bicluster.

Only works if ``rows_`` and ``columns_`` attributes exist.

Expand All @@ -612,7 +614,7 @@ def get_indices(self, i):
return np.nonzero(rows)[0], np.nonzero(columns)[0]

def get_shape(self, i):
"""Shape of the i'th bicluster.
"""Shape of the `i`'th bicluster.

Parameters
----------
Expand All @@ -621,8 +623,11 @@ def get_shape(self, i):

Returns
-------
shape : tuple (int, int)
Number of rows and columns (resp.) in the bicluster.
n_rows : int
Number of rows in the bicluster.

n_cols : int
Number of columns in the bicluster.
Comment on lines +626 to +630
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this is less confusing than the shape: tuple for the users. WDYT @NicolasHug

Copy link
Member

@NicolasHug NicolasHug Jun 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in general we separate the entries like that (numpy does it too)
For 2-uples it's OK to merge them too IMHO

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to separate in two entries because is the approach used in the get_indices method. Nevertheless, LGTM any of these approaches.

Which approach do you prefer?

"""
indices = self.get_indices(i)
return tuple(len(i) for i in indices)
Expand All @@ -634,13 +639,13 @@ def get_submatrix(self, i, data):
----------
i : int
The index of the cluster.
data : array-like
data : array-like of shape (n_samples, n_features)
The data.

Returns
-------
submatrix : ndarray
The submatrix corresponding to bicluster i.
submatrix : ndarray of shape (n_rows, n_cols)
The submatrix corresponding to bicluster `i`.

Notes
-----
Expand All @@ -660,16 +665,16 @@ def fit_transform(self, X, y=None, **fit_params):
"""
Fit to data, then transform it.

Fits transformer to X and y with optional parameters fit_params
and returns a transformed version of X.
Fits transformer to `X` and `y` with optional parameters `fit_params`
and returns a transformed version of `X`.

Parameters
----------
X : {array-like, sparse matrix, dataframe} of shape \
(n_samples, n_features)
X : array-like of shape (n_samples, n_features)
Input samples.

y : ndarray of shape (n_samples,), default=None
y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
default=None
Target values (None for unsupervised transformations).

**fit_params : dict
Expand All @@ -695,11 +700,12 @@ class DensityMixin:
_estimator_type = "DensityEstimator"

def score(self, X, y=None):
"""Return the score of the model on the data X
"""Return the score of the model on the data `X`.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples.

y : Ignored
Not used, present for API consistency by convention.
Expand Down Expand Up @@ -776,7 +782,7 @@ def is_regressor(estimator):

Parameters
----------
estimator : object
estimator : estimator instance
Estimator object to test.

Returns
Expand All @@ -792,7 +798,7 @@ def is_outlier_detector(estimator):

Parameters
----------
estimator : object
estimator : estimator instance
Estimator object to test.

Returns
Expand Down