From 1a7f2fb0939b0386dc1da892df701de8a4cf91af Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Sat, 6 Jun 2020 15:57:26 -0400
Subject: [PATCH 01/42] Update default doc string values

---
 sklearn/linear_model/_huber.py              | 12 +++----
 sklearn/linear_model/_least_angle.py        |  4 +--
 sklearn/linear_model/_omp.py                | 36 +++++++++----------
 sklearn/linear_model/_passive_aggressive.py | 39 +++++++++++----------
 sklearn/linear_model/_ransac.py             | 22 ++++++------
 sklearn/linear_model/_theil_sen.py          | 16 ++++-----
 6 files changed, 65 insertions(+), 64 deletions(-)

diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py
index 77e6ff944b78d..521d3f1233ff9 100644
--- a/sklearn/linear_model/_huber.py
+++ b/sklearn/linear_model/_huber.py
@@ -142,29 +142,29 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
 
     Parameters
     ----------
-    epsilon : float, greater than 1.0, default 1.35
+    epsilon : float, greater than 1.0, default=1.35
         The parameter epsilon controls the number of samples that should be
         classified as outliers. The smaller the epsilon, the more robust it is
         to outliers.
 
-    max_iter : int, default 100
+    max_iter : int, default=100
         Maximum number of iterations that
         ``scipy.optimize.minimize(method="L-BFGS-B")`` should run for.
 
-    alpha : float, default 0.0001
+    alpha : float, default=0.0001
         Regularization parameter.
 
-    warm_start : bool, default False
+    warm_start : bool, default=False
         This is useful if the stored attributes of a previously used model
         has to be reused. If set to False, then the coefficients will
         be rewritten for every call to fit.
         See :term:`the Glossary <warm_start>`.
 
-    fit_intercept : bool, default True
+    fit_intercept : bool, default=True
         Whether or not to fit the intercept. This can be set to False
         if the data is already centered around the origin.
 
-    tol : float, default 1e-5
+    tol : float, default=1e-05
         The iteration will stop when
         ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``
         where pg_i is the i-th component of the projected gradient.
diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
index 255baacea9a59..6af700a6538ad 100644
--- a/sklearn/linear_model/_least_angle.py
+++ b/sklearn/linear_model/_least_angle.py
@@ -1028,7 +1028,7 @@ class LassoLars(Lars):
     max_iter : int, default=500
         Maximum number of iterations to perform.
 
-    eps : float, optional
+    eps : float, default=np.finfo(np.float).eps
         The machine-precision regularization in the computation of the
         Cholesky diagonal factors. Increase this for very ill-conditioned
         systems. Unlike the ``tol`` parameter in some iterative
@@ -1060,7 +1060,7 @@ class LassoLars(Lars):
         `y` values, to satisfy the model's assumption of
         one-at-a-time computations. Might help with stability.
 
-    random_state : int, RandomState instance or None (default)
+    random_state : int, RandomState instance, default=None
         Determines random number generation for jittering. Pass an int
         for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`. Ignored if `jitter` is None.
diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py
index 44371e9fa76e7..0287d4ebc2d5c 100644
--- a/sklearn/linear_model/_omp.py
+++ b/sklearn/linear_model/_omp.py
@@ -289,27 +289,27 @@ def orthogonal_mp(X, y, *, n_nonzero_coefs=None, tol=None, precompute=False,
     y : array, shape (n_samples,) or (n_samples, n_targets)
         Input targets
 
-    n_nonzero_coefs : int
+    n_nonzero_coefs : int, default=None
         Desired number of non-zero entries in the solution. If None (by
         default) this value is set to 10% of n_features.
 
-    tol : float
+    tol : float, default=None
         Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
 
-    precompute : {True, False, 'auto'},
+    precompute : {True, False, 'auto'}, default=False
         Whether to perform precomputations. Improves performance when n_targets
         or n_samples is very large.
 
-    copy_X : bool, optional
+    copy_X : bool, default=True
         Whether the design matrix X must be copied by the algorithm. A false
         value is only helpful if X is already Fortran-ordered, otherwise a
         copy is made anyway.
 
-    return_path : bool, optional. Default: False
+    return_path : bool, default=False
         Whether to return every value of the nonzero coefficients along the
         forward path. Useful for cross-validation.
 
-    return_n_iter : bool, optional default False
+    return_n_iter : bool, default=False
         Whether or not to return the number of iterations.
 
     Returns
@@ -427,30 +427,30 @@ def orthogonal_mp_gram(Gram, Xy, *, n_nonzero_coefs=None, tol=None,
     Xy : array, shape (n_features,) or (n_features, n_targets)
         Input targets multiplied by X: X.T * y
 
-    n_nonzero_coefs : int
+    n_nonzero_coefs : int, default=None
         Desired number of non-zero entries in the solution. If None (by
         default) this value is set to 10% of n_features.
 
-    tol : float
+    tol : float, default=None
         Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
 
-    norms_squared : array-like, shape (n_targets,)
+    norms_squared : array-like, shape (n_targets,), default=None
         Squared L2 norms of the lines of y. Required if tol is not None.
 
-    copy_Gram : bool, optional
+    copy_Gram : bool, default=True
         Whether the gram matrix must be copied by the algorithm. A false
         value is only helpful if it is already Fortran-ordered, otherwise a
         copy is made anyway.
 
-    copy_Xy : bool, optional
+    copy_Xy : bool, default=True
         Whether the covariance vector Xy must be copied by the algorithm.
         If False, it may be overwritten.
 
-    return_path : bool, optional. Default: False
+    return_path : bool, default=False
         Whether to return every value of the nonzero coefficients along the
         forward path. Useful for cross-validation.
 
-    return_n_iter : bool, optional default False
+    return_n_iter : bool, default=False
         Whether or not to return the number of iterations.
 
     Returns
@@ -550,19 +550,19 @@ class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
 
     Parameters
     ----------
-    n_nonzero_coefs : int, optional
+    n_nonzero_coefs : int, default=None
         Desired number of non-zero entries in the solution. If None (by
         default) this value is set to 10% of n_features.
 
-    tol : float, optional
+    tol : float, default=None
         Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
 
-    fit_intercept : boolean, optional
+    fit_intercept : boolean, default=True
         whether to calculate the intercept for this model. If set
         to false, no intercept will be used in calculations
         (i.e. data is expected to be centered).
 
-    normalize : boolean, optional, default True
+    normalize : boolean, default=True
         This parameter is ignored when ``fit_intercept`` is set to False.
         If True, the regressors X will be normalized before regression by
         subtracting the mean and dividing by the l2-norm.
@@ -570,7 +570,7 @@ class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
         :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
         on an estimator with ``normalize=False``.
 
-    precompute : {True, False, 'auto'}, default 'auto'
+    precompute : {True, False, 'auto'}, default='auto'
         Whether to use a precomputed Gram and Xy matrix to speed up
         calculations. Improves performance when :term:`n_targets` or
         :term:`n_samples` is very large. Note that if you already have such
diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py
index 22c47fb1fcf07..55a8d1d0b2304 100644
--- a/sklearn/linear_model/_passive_aggressive.py
+++ b/sklearn/linear_model/_passive_aggressive.py
@@ -15,21 +15,21 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     Parameters
     ----------
 
-    C : float
+    C : float, default=1.0
         Maximum step size (regularization). Defaults to 1.0.
 
-    fit_intercept : bool, default=False
+    fit_intercept : bool, default=True
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered.
 
-    max_iter : int, optional (default=1000)
+    max_iter : int, default=1000
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
         :meth:`partial_fit` method.
 
         .. versionadded:: 0.19
 
-    tol : float or None, optional (default=1e-3)
+    tol : float or None, default=1e-3
         The stopping criterion. If it is not None, the iterations will stop
         when (loss > previous_loss - tol).
 
@@ -59,15 +59,15 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     shuffle : bool, default=True
         Whether or not the training data should be shuffled after each epoch.
 
-    verbose : integer, optional
+    verbose : integer, default=0
         The verbosity level
 
-    loss : string, optional
+    loss : string, default="hinge"
         The loss function to be used:
         hinge: equivalent to PA-I in the reference paper.
         squared_hinge: equivalent to PA-II in the reference paper.
 
-    n_jobs : int or None, optional (default=None)
+    n_jobs : int or None, default=None
         The number of CPUs to use to do the OVA (One Versus All, for
         multi-class problems) computation.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
@@ -80,7 +80,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
         function calls.
         See :term:`Glossary <random_state>`.
 
-    warm_start : bool, optional
+    warm_start : bool, default=False
         When set to True, reuse the solution of the previous call to fit as
         initialization, otherwise, just erase the previous solution.
         See :term:`the Glossary <warm_start>`.
@@ -89,7 +89,8 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
         result in a different solution than when calling fit a single time
         because of the way the data is shuffled.
 
-    class_weight : dict, {class_label: weight} or "balanced" or None, optional
+    class_weight : dict, {class_label: weight} or "balanced" or None, \
+            default=None
         Preset for the class_weight fit parameter.
 
         Weights associated with classes. If not given, all classes
@@ -102,7 +103,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
         .. versionadded:: 0.17
            parameter *class_weight* to automatically weight samples.
 
-    average : bool or int, optional
+    average : bool or int, default=False
         When set to True, computes the averaged SGD weights and stores the
         result in the ``coef_`` attribute. If set to an int greater than 1,
         averaging will begin once the total number of samples seen reaches
@@ -266,21 +267,21 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
     Parameters
     ----------
 
-    C : float
+    C : float, default=1.0
         Maximum step size (regularization). Defaults to 1.0.
 
-    fit_intercept : bool
+    fit_intercept : bool, default=True
         Whether the intercept should be estimated or not. If False, the
         data is assumed to be already centered. Defaults to True.
 
-    max_iter : int, optional (default=1000)
+    max_iter : int, default=1000
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
         :meth:`partial_fit` method.
 
         .. versionadded:: 0.19
 
-    tol : float or None, optional (default=1e-3)
+    tol : float or None, default=1e-3
         The stopping criterion. If it is not None, the iterations will stop
         when (loss > previous_loss - tol).
 
@@ -310,16 +311,16 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
     shuffle : bool, default=True
         Whether or not the training data should be shuffled after each epoch.
 
-    verbose : integer, optional
+    verbose : integer, default=0
         The verbosity level
 
-    loss : string, optional
+    loss : string, default="epsilon_insensitive"
         The loss function to be used:
         epsilon_insensitive: equivalent to PA-I in the reference paper.
         squared_epsilon_insensitive: equivalent to PA-II in the reference
         paper.
 
-    epsilon : float
+    epsilon : float, default=DEFAULT_EPSILON
         If the difference between the current prediction and the correct label
         is below this threshold, the model is not updated.
 
@@ -329,7 +330,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
         function calls.
         See :term:`Glossary <random_state>`.
 
-    warm_start : bool, optional
+    warm_start : bool, default=False
         When set to True, reuse the solution of the previous call to fit as
         initialization, otherwise, just erase the previous solution.
         See :term:`the Glossary <warm_start>`.
@@ -338,7 +339,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor):
         result in a different solution than when calling fit a single time
         because of the way the data is shuffled.
 
-    average : bool or int, optional
+    average : bool or int, default=False
         When set to True, computes the averaged SGD weights and stores the
         result in the ``coef_`` attribute. If set to an int greater than 1,
         averaging will begin once the total number of samples seen reaches
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index 133bafcc6cdee..3ce71e4b4b809 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -65,7 +65,7 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,
 
     Parameters
     ----------
-    base_estimator : object, optional
+    base_estimator : object, default=None
         Base estimator object which implements the following methods:
 
          * `fit(X, y)`: Fit model to given training data and target values.
@@ -83,7 +83,7 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,
         Note that the current implementation only supports regression
         estimators.
 
-    min_samples : int (>= 1) or float ([0, 1]), optional
+    min_samples : int (>= 1) or float ([0, 1]), default=None
         Minimum number of samples chosen randomly from original data. Treated
         as an absolute number of samples for `min_samples >= 1`, treated as a
         relative number `ceil(min_samples * X.shape[0]`) for
@@ -92,17 +92,17 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,
         ``sklearn.linear_model.LinearRegression()`` estimator is assumed and
         `min_samples` is chosen as ``X.shape[1] + 1``.
 
-    residual_threshold : float, optional
+    residual_threshold : float, default=None
         Maximum residual for a data sample to be classified as an inlier.
         By default the threshold is chosen as the MAD (median absolute
         deviation) of the target values `y`.
 
-    is_data_valid : callable, optional
+    is_data_valid : callable, default=None
         This function is called with the randomly selected data before the
         model is fitted to it: `is_data_valid(X, y)`. If its return value is
         False the current randomly chosen sub-sample is skipped.
 
-    is_model_valid : callable, optional
+    is_model_valid : callable, default=None
         This function is called with the estimated model and the randomly
         selected data: `is_model_valid(model, X, y)`. If its return value is
         False the current randomly chosen sub-sample is skipped.
@@ -110,23 +110,23 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,
         with `is_data_valid`. `is_model_valid` should therefore only be used if
         the estimated model is needed for making the rejection decision.
 
-    max_trials : int, optional
+    max_trials : int, default=100
         Maximum number of iterations for random sample selection.
 
-    max_skips : int, optional
+    max_skips : int, default=np.inf
         Maximum number of iterations that can be skipped due to finding zero
         inliers or invalid data defined by ``is_data_valid`` or invalid models
         defined by ``is_model_valid``.
 
         .. versionadded:: 0.19
 
-    stop_n_inliers : int, optional
+    stop_n_inliers : int, default=np.inf
         Stop iteration if at least this number of inliers are found.
 
-    stop_score : float, optional
+    stop_score : float, default=np.inf
         Stop iteration if score is greater equal than this threshold.
 
-    stop_probability : float in range [0, 1], optional
+    stop_probability : float in range [0, 1], default=0.99
         RANSAC iteration stops if at least one outlier-free set of the training
         data is sampled in RANSAC. This requires to generate at least N
         samples (iterations)::
@@ -137,7 +137,7 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,
         as 0.99 (the default) and e is the current fraction of inliers w.r.t.
         the total number of samples.
 
-    loss : string, callable, optional, default "absolute_loss"
+    loss : string, callable, default='absolute_loss'
         String inputs, "absolute_loss" and "squared_loss" are supported which
         find the absolute loss and squared loss per sample
         respectively.
diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py
index 28d2dba3f8719..722c69cddf8c6 100644
--- a/sklearn/linear_model/_theil_sen.py
+++ b/sklearn/linear_model/_theil_sen.py
@@ -209,14 +209,14 @@ class TheilSenRegressor(RegressorMixin, LinearModel):
 
     Parameters
     ----------
-    fit_intercept : boolean, optional, default True
+    fit_intercept : boolean, default=True
         Whether to calculate the intercept for this model. If set
         to false, no intercept will be used in calculations.
 
-    copy_X : boolean, optional, default True
+    copy_X : boolean, default=True
         If True, X will be copied; else, it may be overwritten.
 
-    max_subpopulation : int, optional, default 1e4
+    max_subpopulation : int, default=1e4
         Instead of computing with a set of cardinality 'n choose k', where n is
         the number of samples and k is the number of subsamples (at least
         number of features), consider only a stochastic subpopulation of a
@@ -224,7 +224,7 @@ class TheilSenRegressor(RegressorMixin, LinearModel):
         For other than small problem sizes this parameter will determine
         memory usage and runtime if n_subsamples is not changed.
 
-    n_subsamples : int, optional, default None
+    n_subsamples : int, default=None
         Number of samples to calculate the parameters. This is at least the
         number of features (plus 1 if fit_intercept=True) and the number of
         samples as a maximum. A lower number leads to a higher breakdown
@@ -234,10 +234,10 @@ class TheilSenRegressor(RegressorMixin, LinearModel):
         If n_subsamples is set to n_samples, Theil-Sen is identical to least
         squares.
 
-    max_iter : int, optional, default 300
+    max_iter : int, default=300
         Maximum number of iterations for the calculation of spatial median.
 
-    tol : float, optional, default 1.e-3
+    tol : float, default=1.e-3
         Tolerance when calculating spatial median.
 
     random_state : int, RandomState instance, default=None
@@ -246,13 +246,13 @@ class TheilSenRegressor(RegressorMixin, LinearModel):
         multiple function calls.
         See :term:`Glossary <random_state>`
 
-    n_jobs : int or None, optional (default=None)
+    n_jobs : int or None, default=None
         Number of CPUs to use during the cross validation.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
-    verbose : boolean, optional, default False
+    verbose : boolean, default=False
         Verbose mode when fitting the model.
 
     Attributes

From ca94a795ce59b8be582bc9c658cb10cac40f2165 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Sat, 6 Jun 2020 23:54:12 -0400
Subject: [PATCH 02/42] Update default docstrings for Metrics

---
 sklearn/metrics/_base.py                 |  2 +-
 sklearn/metrics/_classification.py       | 76 +++++++++----------
 sklearn/metrics/_ranking.py              | 34 ++++-----
 sklearn/metrics/_regression.py           | 26 ++++---
 sklearn/metrics/_scorer.py               | 10 +--
 sklearn/metrics/cluster/_bicluster.py    |  2 +-
 sklearn/metrics/cluster/_supervised.py   | 17 +++--
 sklearn/metrics/cluster/_unsupervised.py |  8 +-
 sklearn/metrics/pairwise.py              | 97 ++++++++++++------------
 9 files changed, 138 insertions(+), 134 deletions(-)

diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py
index 63c74c0663adb..5f09dc0f0dd27 100644
--- a/sklearn/metrics/_base.py
+++ b/sklearn/metrics/_base.py
@@ -156,7 +156,7 @@ def _average_multiclass_ovo_score(binary_metric, y_true, y_score,
         Target scores corresponding to probability estimates of a sample
         belonging to a particular class
 
-    average : 'macro' or 'weighted', optional (default='macro')
+    average : 'macro' or 'weighted', default='macro'
         Determines the type of averaging performed on the pairwise binary
         metric scores
         ``'macro'``:
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 2ceccca65203e..c3d98ee51a439 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -140,7 +140,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Predicted labels, as returned by a classifier.
 
-    normalize : bool, optional (default=True)
+    normalize : bool, default=True
         If ``False``, return the number of correctly classified samples.
         Otherwise, return the fraction of correctly classified samples.
 
@@ -370,7 +370,7 @@ def multilabel_confusion_matrix(y_true, y_pred, *, sample_weight=None,
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights
 
-    labels : array-like
+    labels : array-like, default=None
         A list of classes or column indices to select some (or to force
         inclusion of classes absent from the data)
 
@@ -568,12 +568,12 @@ class labels [2]_.
         Labels assigned by the second annotator. The kappa statistic is
         symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.
 
-    labels : array, shape = [n_classes], optional
+    labels : array, shape = [n_classes], default=None
         List of labels to index the matrix. This may be used to select a
         subset of labels. If None, all labels that appear at least once in
         ``y1`` or ``y2`` are used.
 
-    weights : str, optional
+    weights : str, default=None
         Weighting type to calculate the score. None means no weighted;
         "linear" means linear weighted; "quadratic" means quadratic weighted.
 
@@ -641,7 +641,7 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Predicted labels, as returned by a classifier.
 
-    labels : list, optional
+    labels : list, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -650,14 +650,14 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1,
         labels are column indices. By default, all labels in ``y_true`` and
         ``y_pred`` are used in sorted order.
 
-    pos_label : str or int, 1 by default
+    pos_label : str or int, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
-                       'weighted']
+    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
+            default = 'binary']
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
@@ -869,7 +869,7 @@ def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Predicted labels, as returned by a classifier.
 
-    normalize : bool, optional (default=True)
+    normalize : bool, default=True
         If ``False``, return the number of misclassifications.
         Otherwise, return the fraction of misclassifications.
 
@@ -948,7 +948,7 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : list, optional
+    labels : list, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -960,14 +960,14 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
         .. versionchanged:: 0.17
            parameter *labels* improved for multiclass problem.
 
-    pos_label : str or int, 1 by default
+    pos_label : str or int, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
-                       'weighted']
+    average : string, [None, 'micro', 'macro', 'samples','weighted', \
+            default='binary']
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
@@ -1073,7 +1073,7 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1,
     beta : float
         Determines the weight of recall in the combined score.
 
-    labels : list, optional
+    labels : list, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -1085,14 +1085,14 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1,
         .. versionchanged:: 0.17
            parameter *labels* improved for multiclass problem.
 
-    pos_label : str or int, 1 by default
+    pos_label : str or int, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
-                       'weighted']
+    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
+            default='binary']
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
@@ -1312,10 +1312,10 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    beta : float, 1.0 by default
+    beta : float, default=1.0
         The strength of recall versus precision in the F-score.
 
-    labels : list, optional
+    labels : list, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -1324,14 +1324,14 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
         labels are column indices. By default, all labels in ``y_true`` and
         ``y_pred`` are used in sorted order.
 
-    pos_label : str or int, 1 by default
+    pos_label : str or int, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \
-                       'weighted']
+    average : string, ['binary', 'micro', 'macro', 'samples','weighted', \
+            default=None]
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
@@ -1527,7 +1527,7 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : list, optional
+    labels : list, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -1539,14 +1539,14 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
         .. versionchanged:: 0.17
            parameter *labels* improved for multiclass problem.
 
-    pos_label : str or int, 1 by default
+    pos_label : str or int, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
-                       'weighted']
+    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
+            default='binary']
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
@@ -1645,7 +1645,7 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : list, optional
+    labels : list, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -1657,14 +1657,14 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
         .. versionchanged:: 0.17
            parameter *labels* improved for multiclass problem.
 
-    pos_label : str or int, 1 by default
+    pos_label : str or int, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass or multilabel, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
-                       'weighted']
+    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
+            default='binary']
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
@@ -1838,21 +1838,21 @@ def classification_report(y_true, y_pred, *, labels=None, target_names=None,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : array, shape = [n_labels]
+    labels : array, shape = [n_labels], default=None
         Optional list of label indices to include in the report.
 
-    target_names : list of strings
+    target_names : list of strings, default=None
         Optional display names matching the labels (same order).
 
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    digits : int
+    digits : int, default=2
         Number of digits for formatting output floating point values.
         When ``output_dict`` is ``True``, this will be ignored and the
         returned values will not be rounded.
 
-    output_dict : bool (default = False)
+    output_dict : bool, default=False
         If True, return output as dict
 
         .. versionadded:: 0.20
@@ -2145,18 +2145,18 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None,
         ordered alphabetically, as done by
         :class:`preprocessing.LabelBinarizer`.
 
-    eps : float
+    eps : float, default=1e-15
         Log loss is undefined for p=0 or p=1, so probabilities are
         clipped to max(eps, min(1 - eps, p)).
 
-    normalize : bool, optional (default=True)
+    normalize : bool, default=True
         If true, return the mean loss per sample.
         Otherwise, return the sum of the per-sample losses.
 
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    labels : array-like, optional (default=None)
+    labels : array-like, default=None
         If not provided, labels will be inferred from y_true. If ``labels``
         is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
         assumed to be binary and are inferred from ``y_true``.
@@ -2269,7 +2269,7 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):
     pred_decision : array, shape = [n_samples] or [n_samples, n_classes]
         Predicted decisions, as output by decision_function (floats).
 
-    labels : array, optional, default None
+    labels : array, default=None
         Contains all the labels for the problem. Used in multiclass hinge loss.
 
     sample_weight : array-like of shape (n_samples,), default=None
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 8085711246e56..93623d950be63 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -135,7 +135,7 @@ def average_precision_score(y_true, y_score, *, average="macro", pos_label=1,
         class, confidence values, or non-thresholded measure of decisions
         (as returned by "decision_function" on some classifiers).
 
-    average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
+    average : string, [None, 'micro', 'samples', 'weighted', default="macro"]
         If ``None``, the scores for each class are returned. Otherwise,
         this determines the type of averaging performed on the data:
 
@@ -153,7 +153,7 @@ def average_precision_score(y_true, y_score, *, average="macro", pos_label=1,
 
         Will be ignored when ``y_true`` is binary.
 
-    pos_label : int or str (default=1)
+    pos_label : int or str, default=1
         The label of the positive class. Only applied to binary ``y_true``.
         For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1.
 
@@ -411,7 +411,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels,
         Target scores corresponding to probability estimates of a sample
         belonging to a particular class
 
-    labels : array, shape = [n_classes] or None, optional (default=None)
+    labels : array, shape = [n_classes] or None
         List of labels to index ``y_score`` used for multiclass. If ``None``,
         the lexical order of ``y_true`` is used to index ``y_score``.
 
@@ -424,7 +424,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels,
             Calculate metrics for the multiclass case using the one-vs-one
             approach.
 
-    average : 'macro' or 'weighted', optional (default='macro')
+    average : 'macro' or 'weighted'
         Determines the type of averaging performed on the pairwise binary
         metric scores
         ``'macro'``:
@@ -435,7 +435,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels,
             Calculate metrics for each label, taking into account the
             prevalence of the classes.
 
-    sample_weight : array-like of shape (n_samples,), default=None
+    sample_weight : array-like of shape (n_samples,)
         Sample weights.
 
     """
@@ -1075,15 +1075,15 @@ def _dcg_sample_scores(y_true, y_score, k=None,
         or non-thresholded measure of decisions (as returned by
         "decision_function" on some classifiers).
 
-    k : int, optional (default=None)
+    k : int, default=None
         Only consider the highest k scores in the ranking. If None, use all
         outputs.
 
-    log_base : float, optional (default=2)
+    log_base : float, default=2
         Base of the logarithm used for the discount. A low value means a
         sharper discount (top results are more important).
 
-    ignore_ties : bool, optional (default=False)
+    ignore_ties : bool, default=False
         Assume that there are no ties in y_score (which is likely to be the
         case if y_score is continuous) for efficiency gains.
 
@@ -1199,18 +1199,18 @@ def dcg_score(y_true, y_score, *, k=None,
         or non-thresholded measure of decisions (as returned by
         "decision_function" on some classifiers).
 
-    k : int, optional (default=None)
+    k : int, default=None
         Only consider the highest k scores in the ranking. If None, use all
         outputs.
 
-    log_base : float, optional (default=2)
+    log_base : float, default=2
         Base of the logarithm used for the discount. A low value means a
         sharper discount (top results are more important).
 
-    sample_weight : ndarray, shape (n_samples,), optional (default=None)
+    sample_weight : ndarray, shape (n_samples,), default=None
         Sample weights. If None, all samples are given the same weight.
 
-    ignore_ties : bool, optional (default=False)
+    ignore_ties : bool, default=False
         Assume that there are no ties in y_score (which is likely to be the
         case if y_score is continuous) for efficiency gains.
 
@@ -1303,11 +1303,11 @@ def _ndcg_sample_scores(y_true, y_score, k=None, ignore_ties=False):
         or non-thresholded measure of decisions (as returned by
         "decision_function" on some classifiers).
 
-    k : int, optional (default=None)
+    k : int, default=None
         Only consider the highest k scores in the ranking. If None, use all
         outputs.
 
-    ignore_ties : bool, optional (default=False)
+    ignore_ties : bool, default=False
         Assume that there are no ties in y_score (which is likely to be the
         case if y_score is continuous) for efficiency gains.
 
@@ -1356,14 +1356,14 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None,
         or non-thresholded measure of decisions (as returned by
         "decision_function" on some classifiers).
 
-    k : int, optional (default=None)
+    k : int, default=None
         Only consider the highest k scores in the ranking. If None, use all
         outputs.
 
-    sample_weight : ndarray, shape (n_samples,), optional (default=None)
+    sample_weight : ndarray, shape (n_samples,),default=None
         Sample weights. If None, all samples are given the same weight.
 
-    ignore_ties : bool, optional (default=False)
+    ignore_ties : bool, default=False
         Assume that there are no ties in y_score (which is likely to be the
         case if y_score is continuous) for efficiency gains.
 
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index e805bdc099d1f..0168f49fdf476 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -136,11 +136,11 @@ def mean_absolute_error(y_true, y_pred, *,
     y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
         Estimated target values.
 
-    sample_weight : array-like of shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     multioutput : string in ['raw_values', 'uniform_average'] \
-                or array-like of shape (n_outputs)
+                or array-like of shape (n_outputs), default='uniform_average'
         Defines aggregating of multiple output values.
         Array-like value defines weights used to average errors.
 
@@ -208,11 +208,11 @@ def mean_squared_error(y_true, y_pred, *,
     y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
         Estimated target values.
 
-    sample_weight : array-like of shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     multioutput : string in ['raw_values', 'uniform_average'] \
-                or array-like of shape (n_outputs)
+                or array-like of shape (n_outputs), default='uniform_average'
         Defines aggregating of multiple output values.
         Array-like value defines weights used to average errors.
 
@@ -222,7 +222,7 @@ def mean_squared_error(y_true, y_pred, *,
         'uniform_average' :
             Errors of all outputs are averaged with uniform weight.
 
-    squared : boolean value, optional (default = True)
+    squared : boolean value, default=True
         If True returns MSE value, if False returns RMSE value.
 
     Returns
@@ -289,11 +289,11 @@ def mean_squared_log_error(y_true, y_pred, *,
     y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
         Estimated target values.
 
-    sample_weight : array-like of shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     multioutput : string in ['raw_values', 'uniform_average'] \
-            or array-like of shape (n_outputs)
+            or array-like of shape (n_outputs), default='uniform_average'
 
         Defines aggregating of multiple output values.
         Array-like value defines weights used to average errors.
@@ -358,7 +358,7 @@ def median_absolute_error(y_true, y_pred, *, multioutput='uniform_average',
         Estimated target values.
 
     multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
-                (n_outputs,)
+                (n_outputs,), default='uniform_average'
         Defines aggregating of multiple output values. Array-like value defines
         weights used to average errors.
 
@@ -434,11 +434,12 @@ def explained_variance_score(y_true, y_pred, *,
     y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
         Estimated target values.
 
-    sample_weight : array-like of shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     multioutput : string in ['raw_values', 'uniform_average', \
-                'variance_weighted'] or array-like of shape (n_outputs)
+                'variance_weighted'] or array-like of shape (n_outputs), \
+                default='uniform_average'
         Defines aggregating of multiple output scores.
         Array-like value defines weights used to average scores.
 
@@ -529,11 +530,12 @@ def r2_score(y_true, y_pred, *, sample_weight=None,
     y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
         Estimated target values.
 
-    sample_weight : array-like of shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     multioutput : string in ['raw_values', 'uniform_average', \
-'variance_weighted'] or None or array-like of shape (n_outputs)
+                'variance_weighted'] or None or array-like of shape (n_outputs), \
+                default='uniform_average'
 
         Defines aggregating of multiple output scores.
         Array-like value defines weights used to average scores.
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index f116f76edb260..2508529b1fc39 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -150,7 +150,7 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like, optional (default=None)
+        sample_weight : array-like, default=None
             Sample weights.
 
         Returns
@@ -186,7 +186,7 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like, optional (default=None)
+        sample_weight : array-like, default=None
             Sample weights.
 
         Returns
@@ -371,12 +371,12 @@ def check_scoring(estimator, scoring=None, *, allow_none=False):
     estimator : estimator object implementing 'fit'
         The object to use to fit the data.
 
-    scoring : string, callable or None, optional, default: None
+    scoring : string, callable or None, optional, default=None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
 
-    allow_none : boolean, optional, default: False
+    allow_none : boolean, default=False
         If no scoring is specified and the estimator has no score function, we
         can either return None or raise an exception.
 
@@ -431,7 +431,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
     estimator : sklearn estimator instance
         The estimator for which the scoring will be applied.
 
-    scoring : string, callable, list/tuple, dict or None, default: None
+    scoring : string, callable, list/tuple, dict or None, default=None
         A single string (see :ref:`scoring_parameter`) or a callable
         (see :ref:`scoring`) to evaluate the predictions on the test set.
 
diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py
index ac0d0a454a74a..8f967cee6264e 100644
--- a/sklearn/metrics/cluster/_bicluster.py
+++ b/sklearn/metrics/cluster/_bicluster.py
@@ -64,7 +64,7 @@ def consensus_score(a, b, *, similarity="jaccard"):
     b : (rows, columns)
         Another set of biclusters like ``a``.
 
-    similarity : string or function, optional, default: "jaccard"
+    similarity : string or function, default=jaccard
         May be the string "jaccard" to use the Jaccard coefficient, or
         any function that takes four arguments, each of which is a 1d
         indicator vector: (a_rows, a_columns, b_rows, b_columns).
diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index d652737bd23c0..b561d0e5015a4 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -90,12 +90,12 @@ def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False):
     labels_pred : array-like of shape (n_samples,)
         Cluster labels to evaluate
 
-    eps : None or float, optional.
+    eps : None or float, default=None
         If a float, that value is added to all values in the contingency
         matrix. This helps to stop NaN propagation.
         If ``None``, nothing is adjusted.
 
-    sparse : boolean, optional.
+    sparse : boolean, default=False
         If True, return a sparse CSR continency matrix. If ``eps is not None``,
         and ``sparse is True``, will throw ValueError.
 
@@ -280,7 +280,7 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):
     labels_pred : array-like of shape (n_samples,)
         cluster labels to evaluate
 
-    beta : float
+    beta : float, default=1.0
         Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
         If ``beta`` is greater than 1, ``completeness`` is weighted more
         strongly in the calculation. If ``beta`` is less than 1,
@@ -498,7 +498,7 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0):
     labels_pred : array-like of shape (n_samples,)
         cluster labels to evaluate
 
-    beta : float
+    beta : float, default=1.0
         Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
         If ``beta`` is greater than 1, ``completeness`` is weighted more
         strongly in the calculation. If ``beta`` is less than 1,
@@ -602,7 +602,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
         A clustering of the data into disjoint subsets.
 
     contingency : {None, array, sparse matrix}, \
-                  shape = [n_classes_true, n_classes_pred]
+                  shape = [n_classes_true, n_classes_pred], \
+                  default=None
         A contingency matrix given by the :func:`contingency_matrix` function.
         If value is ``None``, it will be computed, otherwise the given value is
         used, with ``labels_true`` and ``labels_pred`` ignored.
@@ -689,7 +690,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, *,
     labels_pred : int array-like of shape (n_samples,)
         A clustering of the data into disjoint subsets.
 
-    average_method : string, optional (default: 'arithmetic')
+    average_method : string, default='arithmetic'
         How to compute the normalizer in the denominator. Possible options
         are 'min', 'geometric', 'arithmetic', and 'max'.
 
@@ -809,7 +810,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, *,
     labels_pred : int array-like of shape (n_samples,)
         A clustering of the data into disjoint subsets.
 
-    average_method : string, optional (default: 'arithmetic')
+    average_method : string, default='arithmetic'
         How to compute the normalizer in the denominator. Possible options
         are 'min', 'geometric', 'arithmetic', and 'max'.
 
@@ -909,7 +910,7 @@ def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):
     labels_pred : array, shape = (``n_samples``, )
         A clustering of the data into disjoint subsets.
 
-    sparse : bool
+    sparse : bool, default=False
         Compute contingency matrix internally with sparse matrix.
 
     Returns
diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py
index ce5563c4763d3..13ed169e69169 100644
--- a/sklearn/metrics/cluster/_unsupervised.py
+++ b/sklearn/metrics/cluster/_unsupervised.py
@@ -66,19 +66,19 @@ def silhouette_score(X, labels, *, metric='euclidean', sample_size=None,
     labels : array, shape = [n_samples]
          Predicted labels for each sample.
 
-    metric : string, or callable
+    metric : string, or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by :func:`metrics.pairwise.pairwise_distances
         <sklearn.metrics.pairwise.pairwise_distances>`. If X is the distance
         array itself, use ``metric="precomputed"``.
 
-    sample_size : int or None
+    sample_size : int or None, default=None
         The size of the sample to use when computing the Silhouette Coefficient
         on a random subset of the data.
         If ``sample_size is None``, no sampling is used.
 
-    random_state : int, RandomState instance or None, optional (default=None)
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for selecting a subset of samples.
         Used when ``sample_size is not None``.
         Pass an int for reproducible results across multiple function calls.
@@ -182,7 +182,7 @@ def silhouette_samples(X, labels, *, metric='euclidean', **kwds):
     labels : array, shape = [n_samples]
              label values for each sample
 
-    metric : string, or callable
+    metric : string, or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If X is
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 2424c84394e2b..3406e0c316459 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -82,24 +82,24 @@ def check_pairwise_arrays(X, Y, *, precomputed=False, dtype=None,
 
     Y : {array-like, sparse matrix}, shape (n_samples_b, n_features)
 
-    precomputed : bool
+    precomputed : bool, default=False
         True if X is to be treated as precomputed distances to the samples in
         Y.
 
-    dtype : string, type, list of types or None (default=None)
+    dtype : string, type, list of types or None, default=None
         Data type required for X and Y. If None, the dtype will be an
         appropriate float type selected by _return_float_dtype.
 
         .. versionadded:: 0.18
 
-    accept_sparse : string, boolean or list/tuple of strings
+    accept_sparse : string, boolean or list/tuple of strings, default='csr'
         String[s] representing allowed sparse matrix formats, such as 'csc',
         'csr', etc. If the input is sparse but not in the allowed format,
         it will be converted to the first listed format. True allows the input
         to be any format. False means that a sparse matrix input will
         raise an error.
 
-    force_all_finite : boolean or 'allow-nan', (default=True)
+    force_all_finite : boolean or 'allow-nan', default=True
         Whether to raise an error on np.inf, np.nan, pd.NA in array. The
         possibilities are:
 
@@ -114,7 +114,7 @@ def check_pairwise_arrays(X, Y, *, precomputed=False, dtype=None,
         .. versionchanged:: 0.23
            Accepts `pd.NA` and converts it into `np.nan`
 
-    copy : bool
+    copy : bool, default=False
         Whether a forced copy will be triggered. If copy=False, a copy might
         be triggered by a conversion.
 
@@ -226,15 +226,15 @@ def euclidean_distances(X, Y=None, *, Y_norm_squared=None, squared=False,
 
     Y : {array-like, sparse matrix}, shape (n_samples_2, n_features)
 
-    Y_norm_squared : array-like, shape (n_samples_2, ), optional
+    Y_norm_squared : array-like, shape (n_samples_2, ), default=None
         Pre-computed dot-products of vectors in Y (e.g.,
         ``(Y**2).sum(axis=1)``)
         May be ignored in some cases, see the note below.
 
-    squared : boolean, optional
+    squared : boolean, default=False
         Return squared Euclidean distances.
 
-    X_norm_squared : array-like of shape (n_samples,), optional
+    X_norm_squared : array-like of shape (n_samples,), default=None
         Pre-computed dot-products of vectors in X (e.g.,
         ``(X**2).sum(axis=1)``)
         May be ignored in some cases, see the note below.
@@ -534,10 +534,10 @@ def pairwise_distances_argmin_min(X, Y, *, axis=1, metric="euclidean",
     Y : {array-like, sparse matrix}, shape (n_samples2, n_features)
         Arrays containing points.
 
-    axis : int, optional, default 1
+    axis : int, optional, default=1
         Axis along which the argmin and distances are to be computed.
 
-    metric : string or callable, default 'euclidean'
+    metric : string or callable, default='euclidean'
         metric to use for distance computation. Any metric from scikit-learn
         or scipy.spatial.distance can be used.
 
@@ -563,7 +563,7 @@ def pairwise_distances_argmin_min(X, Y, *, axis=1, metric="euclidean",
         See the documentation for scipy.spatial.distance for details on these
         metrics.
 
-    metric_kwargs : dict, optional
+    metric_kwargs : dict, default=None
         Keyword arguments to pass to specified metric function.
 
     Returns
@@ -623,10 +623,10 @@ def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean",
         Arrays containing points. Respective shapes (n_samples1, n_features)
         and (n_samples2, n_features)
 
-    axis : int, optional, default 1
+    axis : int, optional, default=1
         Axis along which the argmin and distances are to be computed.
 
-    metric : string or callable
+    metric : string or callable, default="euclidean"
         metric to use for distance computation. Any metric from scikit-learn
         or scipy.spatial.distance can be used.
 
@@ -652,7 +652,7 @@ def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean",
         See the documentation for scipy.spatial.distance for details on these
         metrics.
 
-    metric_kwargs : dict
+    metric_kwargs : dict, default=None
         keyword arguments to pass to specified metric function.
 
     Returns
@@ -688,7 +688,7 @@ def haversine_distances(X, Y=None):
     ----------
     X : array_like, shape (n_samples_1, 2)
 
-    Y : array_like, shape (n_samples_2, 2), optional
+    Y : array_like, shape (n_samples_2, 2), default=None
 
     Returns
     -------
@@ -814,8 +814,8 @@ def cosine_distances(X, Y=None):
     X : array_like, sparse matrix
         with shape (n_samples_X, n_features).
 
-    Y : array_like, sparse matrix (optional)
-        with shape (n_samples_Y, n_features).
+    Y : array_like, sparse matrix with shape 
+        (n_samples_Y, n_features), default=None.
 
     Returns
     -------
@@ -935,7 +935,7 @@ def paired_distances(X, Y, *, metric="euclidean", **kwds):
     Y : ndarray (n_samples, n_features)
         Array 2 for distance computation.
 
-    metric : string or callable
+    metric : string or callable, default="euclidean"
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         specified in PAIRED_DISTANCES, including "euclidean",
@@ -987,9 +987,9 @@ def linear_kernel(X, Y=None, dense_output=True):
     ----------
     X : array of shape (n_samples_1, n_features)
 
-    Y : array of shape (n_samples_2, n_features)
+    Y : array of shape (n_samples_2, n_features), default=None
 
-    dense_output : boolean (optional), default True
+    dense_output : boolean (optional), default=True
         Whether to return dense output even when the input is sparse. If
         ``False``, the output is sparse if both input arrays are sparse.
 
@@ -1015,14 +1015,14 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):
     ----------
     X : ndarray of shape (n_samples_1, n_features)
 
-    Y : ndarray of shape (n_samples_2, n_features)
+    Y : ndarray of shape (n_samples_2, n_features), default=None
 
-    degree : int, default 3
+    degree : int, default=3
 
-    gamma : float, default None
+    gamma : float, default=None
         if None, defaults to 1.0 / n_features
 
-    coef0 : float, default 1
+    coef0 : float, default=1
 
     Returns
     -------
@@ -1051,12 +1051,12 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
     ----------
     X : ndarray of shape (n_samples_1, n_features)
 
-    Y : ndarray of shape (n_samples_2, n_features)
+    Y : ndarray of shape (n_samples_2, n_features), default=None
 
-    gamma : float, default None
+    gamma : float, default=None
         If None, defaults to 1.0 / n_features
 
-    coef0 : float, default 1
+    coef0 : float, default=1
 
     Returns
     -------
@@ -1087,9 +1087,9 @@ def rbf_kernel(X, Y=None, gamma=None):
     ----------
     X : array of shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array of shape (n_samples_Y, n_features), default=None
 
-    gamma : float, default None
+    gamma : float, default=None
         If None, defaults to 1.0 / n_features
 
     Returns
@@ -1122,9 +1122,9 @@ def laplacian_kernel(X, Y=None, gamma=None):
     ----------
     X : array of shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array of shape (n_samples_Y, n_features), default=None
 
-    gamma : float, default None
+    gamma : float, default=None
         If None, defaults to 1.0 / n_features
 
     Returns
@@ -1157,11 +1157,12 @@ def cosine_similarity(X, Y=None, dense_output=True):
     X : ndarray or sparse array, shape: (n_samples_X, n_features)
         Input data.
 
-    Y : ndarray or sparse array, shape: (n_samples_Y, n_features)
+    Y : ndarray or sparse array, shape: (n_samples_Y, n_features), \
+            default=None
         Input data. If ``None``, the output will be the pairwise
         similarities between all samples in ``X``.
 
-    dense_output : boolean (optional), default True
+    dense_output : boolean (optional), default=True
         Whether to return dense output even when the input is sparse. If
         ``False``, the output is sparse if both input arrays are sparse.
 
@@ -1214,7 +1215,7 @@ def additive_chi2_kernel(X, Y=None):
     ----------
     X : array-like of shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array of shape (n_samples_Y, n_features), default=None
 
     Returns
     -------
@@ -1269,7 +1270,7 @@ def chi2_kernel(X, Y=None, gamma=1.):
     ----------
     X : array-like of shape (n_samples_X, n_features)
 
-    Y : array of shape (n_samples_Y, n_features)
+    Y : array of shape (n_samples_Y, n_features), default=None
 
     gamma : float, default=1.
         Scaling parameter of the chi2 kernel.
@@ -1479,11 +1480,11 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None,
         [n_samples_a, n_features] otherwise
         Array of pairwise distances between samples, or a feature array.
 
-    Y : array [n_samples_b, n_features], optional
+    Y : array [n_samples_b, n_features], default=None
         An optional second feature array. Only allowed if
         metric != "precomputed".
 
-    reduce_func : callable, optional
+    reduce_func : callable, default=None
         The function which is applied on each chunk of the distance matrix,
         reducing it to needed values.  ``reduce_func(D_chunk, start)``
         is called repeatedly, where ``D_chunk`` is a contiguous vertical
@@ -1495,7 +1496,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None,
         If None, pairwise_distances_chunked returns a generator of vertical
         chunks of the distance matrix.
 
-    metric : string, or callable
+    metric : string, or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by scipy.spatial.distance.pdist for its metric parameter, or
@@ -1506,7 +1507,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None,
         should take two arrays from X as input and return a value indicating
         the distance between them.
 
-    n_jobs : int or None, optional (default=None)
+    n_jobs : int or None, default=None
         The number of jobs to use for the computation. This works by breaking
         down the pairwise matrix into n_jobs even slices and computing them in
         parallel.
@@ -1515,7 +1516,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None,
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
-    working_memory : int, optional
+    working_memory : int, default=None
         The sought maximum memory for temporary distance matrix chunks.
         When None (default), the value of
         ``sklearn.get_config()['working_memory']`` is used.
@@ -1669,11 +1670,11 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
              [n_samples_a, n_features] otherwise
         Array of pairwise distances between samples, or a feature array.
 
-    Y : array [n_samples_b, n_features], optional
+    Y : array [n_samples_b, n_features], default=None
         An optional second feature array. Only allowed if
         metric != "precomputed".
 
-    metric : string, or callable
+    metric : string, or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by scipy.spatial.distance.pdist for its metric parameter, or
@@ -1684,7 +1685,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
         should take two arrays from X as input and return a value indicating
         the distance between them.
 
-    n_jobs : int or None, optional (default=None)
+    n_jobs : int or None, default=None
         The number of jobs to use for the computation. This works by breaking
         down the pairwise matrix into n_jobs even slices and computing them in
         parallel.
@@ -1693,7 +1694,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
-    force_all_finite : boolean or 'allow-nan', (default=True)
+    force_all_finite : boolean or 'allow-nan', default=True
         Whether to raise an error on np.inf, np.nan, pd.NA in array. The
         possibilities are:
 
@@ -1871,10 +1872,10 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False,
              [n_samples_a, n_features] otherwise
         Array of pairwise kernels between samples, or a feature array.
 
-    Y : array [n_samples_b, n_features]
+    Y : array [n_samples_b, n_features], default=None
         A second feature array only if X has shape [n_samples_a, n_features].
 
-    metric : string, or callable
+    metric : string, or callable, default="linear"
         The metric to use when calculating kernel between instances in a
         feature array. If metric is a string, it must be one of the metrics
         in pairwise.PAIRWISE_KERNEL_FUNCTIONS.
@@ -1887,10 +1888,10 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False,
         matrices, not single samples. Use the string identifying the kernel
         instead.
 
-    filter_params : boolean
+    filter_params : boolean, default=False
         Whether to filter invalid parameters or not.
 
-    n_jobs : int or None, optional (default=None)
+    n_jobs : int or None, default=None
         The number of jobs to use for the computation. This works by breaking
         down the pairwise matrix into n_jobs even slices and computing them in
         parallel.

From cc09f9d914753339c833a2da4b111f70deb6ee35 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Sun, 7 Jun 2020 00:00:47 -0400
Subject: [PATCH 03/42] Fix lint error

---
 sklearn/metrics/_regression.py | 4 ++--
 sklearn/metrics/pairwise.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 0168f49fdf476..6c225fc39f918 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -534,8 +534,8 @@ def r2_score(y_true, y_pred, *, sample_weight=None,
         Sample weights.
 
     multioutput : string in ['raw_values', 'uniform_average', \
-                'variance_weighted'] or None or array-like of shape (n_outputs), \
-                default='uniform_average'
+                'variance_weighted'] or None or array-like of \
+                shape (n_outputs), default='uniform_average'
 
         Defines aggregating of multiple output scores.
         Array-like value defines weights used to average scores.
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 3406e0c316459..8b3a25c4f9b60 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -814,7 +814,7 @@ def cosine_distances(X, Y=None):
     X : array_like, sparse matrix
         with shape (n_samples_X, n_features).
 
-    Y : array_like, sparse matrix with shape 
+    Y : array_like, sparse matrix with shape
         (n_samples_Y, n_features), default=None.
 
     Returns

From c8cd4249098a5a4ce66751ffeb31ecb9f39d2e96 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Sun, 7 Jun 2020 00:25:48 -0400
Subject: [PATCH 04/42] Resolve merge conflicts

---
 sklearn/metrics/pairwise.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index b1c72b9df494d..8506134227045 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -688,11 +688,7 @@ def haversine_distances(X, Y=None):
     ----------
     X : array-like, shape (n_samples_1, 2)
 
-<<<<<<< HEAD
     Y : array_like, shape (n_samples_2, 2), default=None
-=======
-    Y : array-like, shape (n_samples_2, 2), optional
->>>>>>> upstream/master
 
     Returns
     -------

From da935acbda6e6474f13ee788eb3e5ef11e388d0c Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Sun, 7 Jun 2020 00:33:32 -0400
Subject: [PATCH 05/42] Fix flake8 error

---
 sklearn/metrics/pairwise.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 8506134227045..5d72dd5006d72 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -811,7 +811,7 @@ def cosine_distances(X, Y=None):
 
     Parameters
     ----------
-    X : array-like, sparse matrix  
+    X : array-like, sparse matrix
         with shape (n_samples_X, n_features).
 
     Y : array_like, sparse matrix with shape \

From bede6d0c9d3ba3ed920140f7838f7dfd51f5c9b0 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:24:00 -0400
Subject: [PATCH 06/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index c3d98ee51a439..1fd32ee92e34d 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1330,8 +1330,8 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, ['binary', 'micro', 'macro', 'samples','weighted', \
-            default=None]
+    average : {'binary', 'micro', 'macro', 'samples','weighted'}, \
+            default=None
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 

From 7b4647e083262dafd956e19e76c81290672816ed Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:24:15 -0400
Subject: [PATCH 07/42] Update sklearn/metrics/_classification.py

Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 1fd32ee92e34d..3690d29d439be 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -568,7 +568,7 @@ class labels [2]_.
         Labels assigned by the second annotator. The kappa statistic is
         symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.
 
-    labels : array, shape = [n_classes], default=None
+    labels : array-like of shape (n_classes,), default=None
         List of labels to index the matrix. This may be used to select a
         subset of labels. If None, all labels that appear at least once in
         ``y1`` or ``y2`` are used.

From cfe8b2b8ccc6f58199fb7f7923e484988d772b78 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:24:27 -0400
Subject: [PATCH 08/42] Update sklearn/metrics/_base.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py
index 5f09dc0f0dd27..21d0ab38f6a91 100644
--- a/sklearn/metrics/_base.py
+++ b/sklearn/metrics/_base.py
@@ -156,7 +156,7 @@ def _average_multiclass_ovo_score(binary_metric, y_true, y_score,
         Target scores corresponding to probability estimates of a sample
         belonging to a particular class
 
-    average : 'macro' or 'weighted', default='macro'
+    average : {'macro', 'weighted'}, default='macro'
         Determines the type of averaging performed on the pairwise binary
         metric scores
         ``'macro'``:

From 1ec16e835b745f2e7961c58221b2a69503e88bad Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:24:48 -0400
Subject: [PATCH 09/42] Update sklearn/metrics/_classification.py

Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com>
---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 3690d29d439be..84326f3577262 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -656,8 +656,8 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1,
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
-            default = 'binary']
+    average : {None, 'micro', 'macro', 'samples', 'weighted', \
+            'binary'}, default='binary'
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 

From 5e52205c587f0d33bfcb50912afbbfce95159635 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:26:43 -0400
Subject: [PATCH 10/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 84326f3577262..9d993491e3ab7 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -573,7 +573,7 @@ class labels [2]_.
         subset of labels. If None, all labels that appear at least once in
         ``y1`` or ``y2`` are used.
 
-    weights : str, default=None
+    weights : {'linear', 'quatratic'}, default=None
         Weighting type to calculate the score. None means no weighted;
         "linear" means linear weighted; "quadratic" means quadratic weighted.
 

From 8df195e2a803cc6990cd849aaf5e571000fc251e Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:26:53 -0400
Subject: [PATCH 11/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 9d993491e3ab7..7a1191a7183ce 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -641,7 +641,7 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Predicted labels, as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a

From c93effdb7e147a8af0b8d88a402639f6807ef66f Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:27:03 -0400
Subject: [PATCH 12/42] Update sklearn/metrics/pairwise.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/pairwise.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 5d72dd5006d72..f069ec35e72a3 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1695,7 +1695,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
-    force_all_finite : boolean or 'allow-nan', default=True
+    force_all_finite : bool or 'allow-nan', default=True
         Whether to raise an error on np.inf, np.nan, pd.NA in array. The
         possibilities are:
 

From 57ca202167ef44c127f41896c69a0b64679c2fb1 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:27:15 -0400
Subject: [PATCH 13/42] Update sklearn/metrics/pairwise.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/pairwise.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index f069ec35e72a3..3a85781568a0e 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1876,7 +1876,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False,
     Y : array [n_samples_b, n_features], default=None
         A second feature array only if X has shape [n_samples_a, n_features].
 
-    metric : string, or callable, default="linear"
+    metric : str or callable, default="linear"
         The metric to use when calculating kernel between instances in a
         feature array. If metric is a string, it must be one of the metrics
         in pairwise.PAIRWISE_KERNEL_FUNCTIONS.

From 393759291a50ec2f21888d72b39d5b27893b309a Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:27:32 -0400
Subject: [PATCH 14/42] Update sklearn/metrics/pairwise.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/pairwise.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 3a85781568a0e..64585723dfd16 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1892,7 +1892,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False,
     filter_params : boolean, default=False
         Whether to filter invalid parameters or not.
 
-    n_jobs : int or None, default=None
+    n_jobs : int, default=None
         The number of jobs to use for the computation. This works by breaking
         down the pairwise matrix into n_jobs even slices and computing them in
         parallel.

From a069d00329df1821814f010ba1c02e8899f14f1e Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:27:46 -0400
Subject: [PATCH 15/42] Update sklearn/metrics/pairwise.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/pairwise.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 64585723dfd16..a96e960c66835 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1889,7 +1889,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False,
         matrices, not single samples. Use the string identifying the kernel
         instead.
 
-    filter_params : boolean, default=False
+    filter_params : bool, default=False
         Whether to filter invalid parameters or not.
 
     n_jobs : int, default=None

From a82ee380a2d448076d907ff3b21c6efacdb7f525 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:28:34 -0400
Subject: [PATCH 16/42] Update sklearn/metrics/pairwise.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/pairwise.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index a96e960c66835..f6a9e74c1da51 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -1873,7 +1873,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False,
              [n_samples_a, n_features] otherwise
         Array of pairwise kernels between samples, or a feature array.
 
-    Y : array [n_samples_b, n_features], default=None
+    Y : array of shape (n_samples_b, n_features), default=None
         A second feature array only if X has shape [n_samples_a, n_features].
 
     metric : str or callable, default="linear"

From 572ca85ee3337652f7bfe87e5829d079dcf735d1 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:29:39 -0400
Subject: [PATCH 17/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 7a1191a7183ce..41efd31d7a615 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -948,7 +948,7 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a

From 89af3719450d986cc9f23a2f002d28906807cdce Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:29:53 -0400
Subject: [PATCH 18/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 41efd31d7a615..9a3763fb57ef7 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -966,8 +966,8 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'micro', 'macro', 'samples','weighted', \
-            default='binary']
+    average : {'micro', 'macro', 'samples','weighted', 'binary'} or None, \
+            default='binary'
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:

From 601c75723d1037824a836473e7eb283d5efad455 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:35:56 -0400
Subject: [PATCH 19/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 9a3763fb57ef7..70b0994ae40c3 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1073,7 +1073,7 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1,
     beta : float
         Determines the weight of recall in the combined score.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a

From 4ed383019ef8638eb73d5579f81abeaca12258b2 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:39:37 -0400
Subject: [PATCH 20/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 70b0994ae40c3..b3da0477dfb02 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1091,8 +1091,8 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1,
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
-            default='binary']
+    average : {'micro', 'macro', 'samples', 'weighted', 'binary'} or None \
+            default='binary'
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:

From 1fa4425779ea26b0bb3b7310557560aea1a06da1 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 16:56:38 -0400
Subject: [PATCH 21/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index b3da0477dfb02..71cf73b3c872c 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1315,7 +1315,7 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None,
     beta : float, default=1.0
         The strength of recall versus precision in the F-score.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a

From 48a5908594a2c73ceeb35c089b38fbc36ebf441d Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 17:02:15 -0400
Subject: [PATCH 22/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 71cf73b3c872c..1c8e7be68e42a 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1545,8 +1545,8 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
-            default='binary']
+    average : {'micro', 'macro', 'samples', 'weighted', 'binary'} \
+            default='binary'
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:

From 881f73387c524ae07b9a77374feba4f149df4f1d Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 17:02:31 -0400
Subject: [PATCH 23/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 1c8e7be68e42a..06d9bca37c25a 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1527,7 +1527,7 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a

From c85c836279e23d7f122035636648b45d8a90bce1 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 17:41:44 -0400
Subject: [PATCH 24/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 06d9bca37c25a..ccb263787c60f 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1645,7 +1645,7 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a

From 9286fcb7ab6980a88b935040a2f30295d7c15df2 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 17:41:58 -0400
Subject: [PATCH 25/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index ccb263787c60f..20050374910f9 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1663,8 +1663,8 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary',
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : string, [None, 'micro', 'macro', 'samples', 'weighted', \
-            default='binary']
+    average : {'micro', 'macro', 'samples', 'weighted', 'binary'} \
+            default='binary'
         This parameter is required for multiclass/multilabel targets.
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:

From 8367323a63c57046a6277db087170521ca14cce5 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 18:50:20 -0400
Subject: [PATCH 26/42] Update sklearn/metrics/_ranking.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index aef839fafeae2..3b711709bb359 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -1207,7 +1207,7 @@ def dcg_score(y_true, y_score, *, k=None,
         Base of the logarithm used for the discount. A low value means a
         sharper discount (top results are more important).
 
-    sample_weight : ndarray, shape (n_samples,), default=None
+    sample_weight : ndarray of shape (n_samples,), default=None
         Sample weights. If None, all samples are given the same weight.
 
     ignore_ties : bool, default=False

From 056c8e9265977284f5c574ccd33b877dc7f32d24 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Tue, 9 Jun 2020 23:43:07 -0400
Subject: [PATCH 27/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 20050374910f9..5acfdb245095a 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1838,7 +1838,7 @@ def classification_report(y_true, y_pred, *, labels=None, target_names=None,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Estimated targets as returned by a classifier.
 
-    labels : array, shape = [n_labels], default=None
+    labels : array-like of shape (n_labels,), default=None
         Optional list of label indices to include in the report.
 
     target_names : list of strings, default=None

From 036b8b69f6766fbafa816cce5f90dd51af091cac Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Wed, 10 Jun 2020 17:25:39 -0400
Subject: [PATCH 28/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 5acfdb245095a..f7eed56d81ab7 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2269,7 +2269,7 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):
     pred_decision : array, shape = [n_samples] or [n_samples, n_classes]
         Predicted decisions, as output by decision_function (floats).
 
-    labels : array, default=None
+    labels : array-like, default=None
         Contains all the labels for the problem. Used in multiclass hinge loss.
 
     sample_weight : array-like of shape (n_samples,), default=None

From 1a83796199710e285be97db351a50d025052fd3f Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Wed, 10 Jun 2020 17:25:53 -0400
Subject: [PATCH 29/42] Update sklearn/metrics/_classification.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index f7eed56d81ab7..d6070b38dfec5 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -1841,7 +1841,7 @@ def classification_report(y_true, y_pred, *, labels=None, target_names=None,
     labels : array-like of shape (n_labels,), default=None
         Optional list of label indices to include in the report.
 
-    target_names : list of strings, default=None
+    target_names : list of str of shape (n_labels,), default=None
         Optional display names matching the labels (same order).
 
     sample_weight : array-like of shape (n_samples,), default=None

From a3d0e1a6a81da8fe68fb95def81cdaa69a251dd2 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Wed, 10 Jun 2020 17:57:41 -0400
Subject: [PATCH 30/42] Update sklearn/metrics/_ranking.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 3b711709bb359..869cdc132cc6d 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -135,7 +135,8 @@ def average_precision_score(y_true, y_score, *, average="macro", pos_label=1,
         class, confidence values, or non-thresholded measure of decisions
         (as returned by "decision_function" on some classifiers).
 
-    average : string, [None, 'micro', 'samples', 'weighted', default="macro"]
+    average : {'micro', 'samples', 'weighted', 'macro'} or None, \
+            default='macro'
         If ``None``, the scores for each class are returned. Otherwise,
         this determines the type of averaging performed on the data:
 

From 8573644c89230aa7312985d2d4da6bcefe3f4ba1 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 00:01:58 -0400
Subject: [PATCH 31/42] Update sklearn/metrics/_ranking.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 869cdc132cc6d..69769cbadbc7d 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -412,7 +412,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels,
         Target scores corresponding to probability estimates of a sample
         belonging to a particular class
 
-    labels : array, shape = [n_classes] or None
+    labels : array-like of shape (n_classes,), default=None
         List of labels to index ``y_score`` used for multiclass. If ``None``,
         the lexical order of ``y_true`` is used to index ``y_score``.
 

From c291010886c8704b2b41ae36e8341caad4534e37 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 00:02:32 -0400
Subject: [PATCH 32/42] Update sklearn/metrics/_ranking.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 69769cbadbc7d..9156df1f8fc9b 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -1361,7 +1361,7 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None,
         Only consider the highest k scores in the ranking. If None, use all
         outputs.
 
-    sample_weight : ndarray, shape (n_samples,),default=None
+    sample_weight : ndarray of shape (n_samples,),default=None
         Sample weights. If None, all samples are given the same weight.
 
     ignore_ties : bool, default=False

From 40dc8b008c3d222047558b812cda501f94900a0d Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 20:52:11 -0400
Subject: [PATCH 33/42] Update sklearn/metrics/_ranking.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 9156df1f8fc9b..3d7628d69596a 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -425,7 +425,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels,
             Calculate metrics for the multiclass case using the one-vs-one
             approach.
 
-    average : 'macro' or 'weighted'
+    average : {'macro', 'weighted'}, default='macro'
         Determines the type of averaging performed on the pairwise binary
         metric scores
         ``'macro'``:

From 769e6fb44f62cb4eaf7fdaadf1e32097e5c01ba0 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 20:52:31 -0400
Subject: [PATCH 34/42] Update sklearn/metrics/_ranking.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 3d7628d69596a..7e2c9dfd552b0 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -436,7 +436,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels,
             Calculate metrics for each label, taking into account the
             prevalence of the classes.
 
-    sample_weight : array-like of shape (n_samples,)
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     """

From 0848721eafe2ab8f84e799644a86850a3278ed32 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 20:57:07 -0400
Subject: [PATCH 35/42] Update sklearn/metrics/_regression.py

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 6c225fc39f918..e87106eb9320f 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -139,7 +139,7 @@ def mean_absolute_error(y_true, y_pred, *,
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    multioutput : string in ['raw_values', 'uniform_average'] \
+    multioutput : {'raw_values', 'uniform_average'} \
                 or array-like of shape (n_outputs), default='uniform_average'
         Defines aggregating of multiple output values.
         Array-like value defines weights used to average errors.

From 37963a2a1ad56fb972f24516b417557fa6b2205d Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 21:01:30 -0400
Subject: [PATCH 36/42] Apply suggestions from code review

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/metrics/_regression.py           |  4 +--
 sklearn/metrics/_scorer.py               | 10 +++---
 sklearn/metrics/cluster/_bicluster.py    |  2 +-
 sklearn/metrics/cluster/_supervised.py   | 12 +++----
 sklearn/metrics/cluster/_unsupervised.py |  8 ++---
 sklearn/metrics/pairwise.py              | 42 ++++++++++++------------
 6 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index e87106eb9320f..685297af80cb7 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -292,7 +292,7 @@ def mean_squared_log_error(y_true, y_pred, *,
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    multioutput : string in ['raw_values', 'uniform_average'] \
+    multioutput : {'raw_values', 'uniform_average'} \
             or array-like of shape (n_outputs), default='uniform_average'
 
         Defines aggregating of multiple output values.
@@ -533,7 +533,7 @@ def r2_score(y_true, y_pred, *, sample_weight=None,
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    multioutput : string in ['raw_values', 'uniform_average', \
+    multioutput : {'raw_values', 'uniform_average', 'variance_weigthed'}, None or \
                 'variance_weighted'] or None or array-like of \
                 shape (n_outputs), default='uniform_average'
 
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index 2508529b1fc39..bed3fb496f72a 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -150,7 +150,7 @@ def __call__(self, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like, default=None
+        sample_weight : array-like of shape (n_samples,), default=None
             Sample weights.
 
         Returns
@@ -186,7 +186,7 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
         y_true : array-like
             Gold standard target values for X.
 
-        sample_weight : array-like, default=None
+        sample_weight : array-like of shape (n_samples,), default=None
             Sample weights.
 
         Returns
@@ -371,12 +371,12 @@ def check_scoring(estimator, scoring=None, *, allow_none=False):
     estimator : estimator object implementing 'fit'
         The object to use to fit the data.
 
-    scoring : string, callable or None, optional, default=None
+    scoring : str or callable, default=None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
 
-    allow_none : boolean, default=False
+    allow_none : bool, default=False
         If no scoring is specified and the estimator has no score function, we
         can either return None or raise an exception.
 
@@ -431,7 +431,7 @@ def _check_multimetric_scoring(estimator, scoring=None):
     estimator : sklearn estimator instance
         The estimator for which the scoring will be applied.
 
-    scoring : string, callable, list/tuple, dict or None, default=None
+    scoring : str, callable, list, tuple or dict, default=None
         A single string (see :ref:`scoring_parameter`) or a callable
         (see :ref:`scoring`) to evaluate the predictions on the test set.
 
diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py
index 8f967cee6264e..e267b44cee229 100644
--- a/sklearn/metrics/cluster/_bicluster.py
+++ b/sklearn/metrics/cluster/_bicluster.py
@@ -64,7 +64,7 @@ def consensus_score(a, b, *, similarity="jaccard"):
     b : (rows, columns)
         Another set of biclusters like ``a``.
 
-    similarity : string or function, default=jaccard
+    similarity : 'jaccard' or callable, default='jaccard'
         May be the string "jaccard" to use the Jaccard coefficient, or
         any function that takes four arguments, each of which is a 1d
         indicator vector: (a_rows, a_columns, b_rows, b_columns).
diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index b561d0e5015a4..7f84001f98dec 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -90,12 +90,12 @@ def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False):
     labels_pred : array-like of shape (n_samples,)
         Cluster labels to evaluate
 
-    eps : None or float, default=None
+    eps : float, default=None
         If a float, that value is added to all values in the contingency
         matrix. This helps to stop NaN propagation.
         If ``None``, nothing is adjusted.
 
-    sparse : boolean, default=False
+    sparse : bool, default=False
         If True, return a sparse CSR continency matrix. If ``eps is not None``,
         and ``sparse is True``, will throw ValueError.
 
@@ -601,8 +601,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
     labels_pred : int array-like of shape (n_samples,)
         A clustering of the data into disjoint subsets.
 
-    contingency : {None, array, sparse matrix}, \
-                  shape = [n_classes_true, n_classes_pred], \
+    contingency : {ndarray, sparse matrix} of \
+                  shape (n_classes_true, n_classes_pred), \
                   default=None
         A contingency matrix given by the :func:`contingency_matrix` function.
         If value is ``None``, it will be computed, otherwise the given value is
@@ -690,7 +690,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, *,
     labels_pred : int array-like of shape (n_samples,)
         A clustering of the data into disjoint subsets.
 
-    average_method : string, default='arithmetic'
+    average_method : str, default='arithmetic'
         How to compute the normalizer in the denominator. Possible options
         are 'min', 'geometric', 'arithmetic', and 'max'.
 
@@ -810,7 +810,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, *,
     labels_pred : int array-like of shape (n_samples,)
         A clustering of the data into disjoint subsets.
 
-    average_method : string, default='arithmetic'
+    average_method : str, default='arithmetic'
         How to compute the normalizer in the denominator. Possible options
         are 'min', 'geometric', 'arithmetic', and 'max'.
 
diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py
index 13ed169e69169..d0662967816b9 100644
--- a/sklearn/metrics/cluster/_unsupervised.py
+++ b/sklearn/metrics/cluster/_unsupervised.py
@@ -66,19 +66,19 @@ def silhouette_score(X, labels, *, metric='euclidean', sample_size=None,
     labels : array, shape = [n_samples]
          Predicted labels for each sample.
 
-    metric : string, or callable, default='euclidean'
+    metric : str or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by :func:`metrics.pairwise.pairwise_distances
         <sklearn.metrics.pairwise.pairwise_distances>`. If X is the distance
         array itself, use ``metric="precomputed"``.
 
-    sample_size : int or None, default=None
+    sample_size : int, default=None
         The size of the sample to use when computing the Silhouette Coefficient
         on a random subset of the data.
         If ``sample_size is None``, no sampling is used.
 
-    random_state : int, RandomState instance or None, default=None
+    random_state : int or RandomState instance, default=None
         Determines random number generation for selecting a subset of samples.
         Used when ``sample_size is not None``.
         Pass an int for reproducible results across multiple function calls.
@@ -182,7 +182,7 @@ def silhouette_samples(X, labels, *, metric='euclidean', **kwds):
     labels : array, shape = [n_samples]
              label values for each sample
 
-    metric : string, or callable, default='euclidean'
+    metric : str or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If X is
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index f6a9e74c1da51..8adf46f5564d6 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -86,20 +86,20 @@ def check_pairwise_arrays(X, Y, *, precomputed=False, dtype=None,
         True if X is to be treated as precomputed distances to the samples in
         Y.
 
-    dtype : string, type, list of types or None, default=None
+    dtype : str, type, list of types, default=None
         Data type required for X and Y. If None, the dtype will be an
         appropriate float type selected by _return_float_dtype.
 
         .. versionadded:: 0.18
 
-    accept_sparse : string, boolean or list/tuple of strings, default='csr'
+    accept_sparse : str, bool or list/tuple of strings, default='csr'
         String[s] representing allowed sparse matrix formats, such as 'csc',
         'csr', etc. If the input is sparse but not in the allowed format,
         it will be converted to the first listed format. True allows the input
         to be any format. False means that a sparse matrix input will
         raise an error.
 
-    force_all_finite : boolean or 'allow-nan', default=True
+    force_all_finite : bool or 'allow-nan', default=True
         Whether to raise an error on np.inf, np.nan, pd.NA in array. The
         possibilities are:
 
@@ -226,12 +226,12 @@ def euclidean_distances(X, Y=None, *, Y_norm_squared=None, squared=False,
 
     Y : {array-like, sparse matrix}, shape (n_samples_2, n_features)
 
-    Y_norm_squared : array-like, shape (n_samples_2, ), default=None
+    Y_norm_squared : array-like of shape (n_samples_2, ), default=None
         Pre-computed dot-products of vectors in Y (e.g.,
         ``(Y**2).sum(axis=1)``)
         May be ignored in some cases, see the note below.
 
-    squared : boolean, default=False
+    squared : bool, default=False
         Return squared Euclidean distances.
 
     X_norm_squared : array-like of shape (n_samples,), default=None
@@ -534,10 +534,10 @@ def pairwise_distances_argmin_min(X, Y, *, axis=1, metric="euclidean",
     Y : {array-like, sparse matrix}, shape (n_samples2, n_features)
         Arrays containing points.
 
-    axis : int, optional, default=1
+    axis : int, default=1
         Axis along which the argmin and distances are to be computed.
 
-    metric : string or callable, default='euclidean'
+    metric : str or callable, default='euclidean'
         metric to use for distance computation. Any metric from scikit-learn
         or scipy.spatial.distance can be used.
 
@@ -623,10 +623,10 @@ def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean",
         Arrays containing points. Respective shapes (n_samples1, n_features)
         and (n_samples2, n_features)
 
-    axis : int, optional, default=1
+    axis : int, default=1
         Axis along which the argmin and distances are to be computed.
 
-    metric : string or callable, default="euclidean"
+    metric : str or callable, default="euclidean"
         metric to use for distance computation. Any metric from scikit-learn
         or scipy.spatial.distance can be used.
 
@@ -688,7 +688,7 @@ def haversine_distances(X, Y=None):
     ----------
     X : array-like, shape (n_samples_1, 2)
 
-    Y : array_like, shape (n_samples_2, 2), default=None
+    Y : array_like of shape (n_samples_2, 2), default=None
 
     Returns
     -------
@@ -814,8 +814,8 @@ def cosine_distances(X, Y=None):
     X : array-like, sparse matrix
         with shape (n_samples_X, n_features).
 
-    Y : array_like, sparse matrix with shape \
-        (n_samples_Y, n_features), default=None.
+    Y : {array-like, sparse matrix} of shape \
+        (n_samples_Y, n_features), default=None
 
 
     Returns
@@ -990,7 +990,7 @@ def linear_kernel(X, Y=None, dense_output=True):
 
     Y : array of shape (n_samples_2, n_features), default=None
 
-    dense_output : boolean (optional), default=True
+    dense_output : bool, default=True
         Whether to return dense output even when the input is sparse. If
         ``False``, the output is sparse if both input arrays are sparse.
 
@@ -1158,12 +1158,12 @@ def cosine_similarity(X, Y=None, dense_output=True):
     X : ndarray or sparse array, shape: (n_samples_X, n_features)
         Input data.
 
-    Y : ndarray or sparse array, shape: (n_samples_Y, n_features), \
+    Y : {ndarray, sparse matrix} of shape (n_samples_Y, n_features), \
             default=None
         Input data. If ``None``, the output will be the pairwise
         similarities between all samples in ``X``.
 
-    dense_output : boolean (optional), default=True
+    dense_output : bool, default=True
         Whether to return dense output even when the input is sparse. If
         ``False``, the output is sparse if both input arrays are sparse.
 
@@ -1481,7 +1481,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None,
         [n_samples_a, n_features] otherwise
         Array of pairwise distances between samples, or a feature array.
 
-    Y : array [n_samples_b, n_features], default=None
+    Y : array of shape (n_samples_b, n_features), default=None
         An optional second feature array. Only allowed if
         metric != "precomputed".
 
@@ -1497,7 +1497,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None,
         If None, pairwise_distances_chunked returns a generator of vertical
         chunks of the distance matrix.
 
-    metric : string, or callable, default='euclidean'
+    metric : str or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by scipy.spatial.distance.pdist for its metric parameter, or
@@ -1508,7 +1508,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None,
         should take two arrays from X as input and return a value indicating
         the distance between them.
 
-    n_jobs : int or None, default=None
+    n_jobs : int, default=None
         The number of jobs to use for the computation. This works by breaking
         down the pairwise matrix into n_jobs even slices and computing them in
         parallel.
@@ -1671,11 +1671,11 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
              [n_samples_a, n_features] otherwise
         Array of pairwise distances between samples, or a feature array.
 
-    Y : array [n_samples_b, n_features], default=None
+    Y : array of shape (n_samples_b, n_features), default=None
         An optional second feature array. Only allowed if
         metric != "precomputed".
 
-    metric : string, or callable, default='euclidean'
+    metric : str or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
         allowed by scipy.spatial.distance.pdist for its metric parameter, or
@@ -1686,7 +1686,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None,
         should take two arrays from X as input and return a value indicating
         the distance between them.
 
-    n_jobs : int or None, default=None
+    n_jobs : int, default=None
         The number of jobs to use for the computation. This works by breaking
         down the pairwise matrix into n_jobs even slices and computing them in
         parallel.

From dc5cc394b1bf7214409523e8c2d80e74fb5acfc5 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 21:06:28 -0400
Subject: [PATCH 37/42] Make edits

---
 sklearn/metrics/_classification.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index d6070b38dfec5..ea2d55229c4a8 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -370,7 +370,7 @@ def multilabel_confusion_matrix(y_true, y_pred, *, sample_weight=None,
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights
 
-    labels : array-like, default=None
+    labels : array-like of shape (n_classes,), default=None
         A list of classes or column indices to select some (or to force
         inclusion of classes absent from the data)
 
@@ -641,7 +641,7 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1,
     y_pred : 1d array-like, or label indicator array / sparse matrix
         Predicted labels, as returned by a classifier.
 
-    labels : array-like, default=None
+    labels : array-like of shape (n_classes,), default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a

From 711658bd473a23cc0923f9619f866cc27222a854 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 11 Jun 2020 21:13:29 -0400
Subject: [PATCH 38/42] Make edits

---
 sklearn/metrics/_regression.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 685297af80cb7..72d7bc465fc3b 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -533,9 +533,9 @@ def r2_score(y_true, y_pred, *, sample_weight=None,
     sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
-    multioutput : {'raw_values', 'uniform_average', 'variance_weigthed'}, None or \
-                'variance_weighted'] or None or array-like of \
-                shape (n_outputs), default='uniform_average'
+    multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} \
+                or None or array-like of shape (n_outputs), \
+                default='uniform_average'
 
         Defines aggregating of multiple output scores.
         Array-like value defines weights used to average scores.

From 0da8b252e61916b55d9e602e1250190a14051039 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Fri, 11 Jun 2021 19:48:33 -0400
Subject: [PATCH 39/42] Fix another merge conflict

---
 sklearn/metrics/cluster/_supervised.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index b48e61ab4e70f..7814e7ba50e1c 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -101,13 +101,8 @@ def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False,
         If ``None``, nothing is adjusted.
 
     sparse : bool, default=False
-<<<<<<< HEAD
-        If True, return a sparse CSR continency matrix. If ``eps is not None``,
-        and ``sparse is True``, will throw ValueError.
-=======
         If `True`, return a sparse CSR continency matrix. If `eps` is not
         `None` and `sparse` is `True` will raise ValueError.
->>>>>>> 038c5cd04558e572b6a4dea7383a515ff10090e5
 
         .. versionadded:: 0.18
 
@@ -745,14 +740,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
     labels_pred : int array-like of shape (n_samples,)
         A clustering of the data into disjoint subsets.
 
-<<<<<<< HEAD
-    contingency : {ndarray, sparse matrix} of \
-                  shape (n_classes_true, n_classes_pred), \
-                  default=None
-=======
     contingency : {ndarray, sparse matrix} of shape \
             (n_classes_true, n_classes_pred), default=None
->>>>>>> 038c5cd04558e572b6a4dea7383a515ff10090e5
         A contingency matrix given by the :func:`contingency_matrix` function.
         If value is ``None``, it will be computed, otherwise the given value is
         used, with ``labels_true`` and ``labels_pred`` ignored.

From 0fa9b85af5e6a6a7bc0a9c83759982f65de425e9 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Thu, 1 Jul 2021 23:38:51 -0400
Subject: [PATCH 40/42] Accept incoming change from previous merge

---
 sklearn/metrics/cluster/_unsupervised.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py
index 7890b97f06a8a..fd4933c1df17a 100644
--- a/sklearn/metrics/cluster/_unsupervised.py
+++ b/sklearn/metrics/cluster/_unsupervised.py
@@ -79,11 +79,7 @@ def silhouette_score(
         on a random subset of the data.
         If ``sample_size is None``, no sampling is used.
 
-<<<<<<< HEAD
-    random_state : int or RandomState instance, default=None
-=======
     random_state : int, RandomState instance or None, default=None
->>>>>>> 038c5cd04558e572b6a4dea7383a515ff10090e5
         Determines random number generation for selecting a subset of samples.
         Used when ``sample_size is not None``.
         Pass an int for reproducible results across multiple function calls.

From e0ab80702aeeec0629b19c4fef73de1ac929203b Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Mon, 5 Jul 2021 22:12:33 -0400
Subject: [PATCH 41/42] Remove Lars from DOCSTRING_IGNORE_LIST.

---
 maint_tools/test_docstrings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py
index 9b23b1789aeb4..abd1f3303c518 100644
--- a/maint_tools/test_docstrings.py
+++ b/maint_tools/test_docstrings.py
@@ -75,7 +75,6 @@
     "LabelEncoder",
     "LabelPropagation",
     "LabelSpreading",
-    "Lars",
     "LarsCV",
     "LassoCV",
     "LassoLars",

From 4da84c407729da7ab4d53773ea4663e4ff083fa8 Mon Sep 17 00:00:00 2001
From: genvalen <genvalen@protonmail.com>
Date: Mon, 5 Jul 2021 22:23:14 -0400
Subject: [PATCH 42/42] Ensure Lars passes numpydoc validation

---
 sklearn/linear_model/_least_angle.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
index deec81a29c190..27b0fafd70ecd 100644
--- a/sklearn/linear_model/_least_angle.py
+++ b/sklearn/linear_model/_least_angle.py
@@ -842,7 +842,7 @@ def _lars_path_solver(
 
 
 class Lars(MultiOutputMixin, RegressorMixin, LinearModel):
-    """Least Angle Regression model a.k.a. LAR
+    """Least Angle Regression model a.k.a. LAR.
 
     Read more in the :ref:`User Guide <least_angle_regression>`.
 
@@ -940,6 +940,13 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel):
 
         .. versionadded:: 0.24
 
+    See Also
+    --------
+    lars_path: Compute Least Angle Regression or Lasso
+        path using LARS algorithm.
+    LarsCV : Cross-validated Least Angle Regression model.
+    sklearn.decomposition.sparse_encode : Sparse coding.
+
     Examples
     --------
     >>> from sklearn import linear_model
@@ -948,12 +955,6 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel):
     Lars(n_nonzero_coefs=1, normalize=False)
     >>> print(reg.coef_)
     [ 0. -1.11...]
-
-    See Also
-    --------
-    lars_path, LarsCV
-    sklearn.decomposition.sparse_encode
-
     """
 
     method = "lar"
@@ -1094,7 +1095,7 @@ def fit(self, X, y, Xy=None):
         Returns
         -------
         self : object
-            returns an instance of self.
+            Returns an instance of self.
         """
         X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)