From 1a7f2fb0939b0386dc1da892df701de8a4cf91af Mon Sep 17 00:00:00 2001 From: genvalen Date: Sat, 6 Jun 2020 15:57:26 -0400 Subject: [PATCH 01/42] Update default doc string values --- sklearn/linear_model/_huber.py | 12 +++---- sklearn/linear_model/_least_angle.py | 4 +-- sklearn/linear_model/_omp.py | 36 +++++++++---------- sklearn/linear_model/_passive_aggressive.py | 39 +++++++++++---------- sklearn/linear_model/_ransac.py | 22 ++++++------ sklearn/linear_model/_theil_sen.py | 16 ++++----- 6 files changed, 65 insertions(+), 64 deletions(-) diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py index 77e6ff944b78d..521d3f1233ff9 100644 --- a/sklearn/linear_model/_huber.py +++ b/sklearn/linear_model/_huber.py @@ -142,29 +142,29 @@ class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator): Parameters ---------- - epsilon : float, greater than 1.0, default 1.35 + epsilon : float, greater than 1.0, default=1.35 The parameter epsilon controls the number of samples that should be classified as outliers. The smaller the epsilon, the more robust it is to outliers. - max_iter : int, default 100 + max_iter : int, default=100 Maximum number of iterations that ``scipy.optimize.minimize(method="L-BFGS-B")`` should run for. - alpha : float, default 0.0001 + alpha : float, default=0.0001 Regularization parameter. - warm_start : bool, default False + warm_start : bool, default=False This is useful if the stored attributes of a previously used model has to be reused. If set to False, then the coefficients will be rewritten for every call to fit. See :term:`the Glossary `. - fit_intercept : bool, default True + fit_intercept : bool, default=True Whether or not to fit the intercept. This can be set to False if the data is already centered around the origin. - tol : float, default 1e-5 + tol : float, default=1e-05 The iteration will stop when ``max{|proj g_i | i = 1, ..., n}`` <= ``tol`` where pg_i is the i-th component of the projected gradient. diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index 255baacea9a59..6af700a6538ad 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -1028,7 +1028,7 @@ class LassoLars(Lars): max_iter : int, default=500 Maximum number of iterations to perform. - eps : float, optional + eps : float, default=np.finfo(np.float).eps The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Unlike the ``tol`` parameter in some iterative @@ -1060,7 +1060,7 @@ class LassoLars(Lars): `y` values, to satisfy the model's assumption of one-at-a-time computations. Might help with stability. - random_state : int, RandomState instance or None (default) + random_state : int, RandomState instance, default=None Determines random number generation for jittering. Pass an int for reproducible output across multiple function calls. See :term:`Glossary `. Ignored if `jitter` is None. diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py index 44371e9fa76e7..0287d4ebc2d5c 100644 --- a/sklearn/linear_model/_omp.py +++ b/sklearn/linear_model/_omp.py @@ -289,27 +289,27 @@ def orthogonal_mp(X, y, *, n_nonzero_coefs=None, tol=None, precompute=False, y : array, shape (n_samples,) or (n_samples, n_targets) Input targets - n_nonzero_coefs : int + n_nonzero_coefs : int, default=None Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features. - tol : float + tol : float, default=None Maximum norm of the residual. If not None, overrides n_nonzero_coefs. - precompute : {True, False, 'auto'}, + precompute : {True, False, 'auto'}, default=False Whether to perform precomputations. Improves performance when n_targets or n_samples is very large. - copy_X : bool, optional + copy_X : bool, default=True Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. - return_path : bool, optional. Default: False + return_path : bool, default=False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. - return_n_iter : bool, optional default False + return_n_iter : bool, default=False Whether or not to return the number of iterations. Returns @@ -427,30 +427,30 @@ def orthogonal_mp_gram(Gram, Xy, *, n_nonzero_coefs=None, tol=None, Xy : array, shape (n_features,) or (n_features, n_targets) Input targets multiplied by X: X.T * y - n_nonzero_coefs : int + n_nonzero_coefs : int, default=None Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features. - tol : float + tol : float, default=None Maximum norm of the residual. If not None, overrides n_nonzero_coefs. - norms_squared : array-like, shape (n_targets,) + norms_squared : array-like, shape (n_targets,), default=None Squared L2 norms of the lines of y. Required if tol is not None. - copy_Gram : bool, optional + copy_Gram : bool, default=True Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. - copy_Xy : bool, optional + copy_Xy : bool, default=True Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten. - return_path : bool, optional. Default: False + return_path : bool, default=False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. - return_n_iter : bool, optional default False + return_n_iter : bool, default=False Whether or not to return the number of iterations. Returns @@ -550,19 +550,19 @@ class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel): Parameters ---------- - n_nonzero_coefs : int, optional + n_nonzero_coefs : int, default=None Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features. - tol : float, optional + tol : float, default=None Maximum norm of the residual. If not None, overrides n_nonzero_coefs. - fit_intercept : boolean, optional + fit_intercept : boolean, default=True whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). - normalize : boolean, optional, default True + normalize : boolean, default=True This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. @@ -570,7 +570,7 @@ class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel): :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - precompute : {True, False, 'auto'}, default 'auto' + precompute : {True, False, 'auto'}, default='auto' Whether to use a precomputed Gram and Xy matrix to speed up calculations. Improves performance when :term:`n_targets` or :term:`n_samples` is very large. Note that if you already have such diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py index 22c47fb1fcf07..55a8d1d0b2304 100644 --- a/sklearn/linear_model/_passive_aggressive.py +++ b/sklearn/linear_model/_passive_aggressive.py @@ -15,21 +15,21 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): Parameters ---------- - C : float + C : float, default=1.0 Maximum step size (regularization). Defaults to 1.0. - fit_intercept : bool, default=False + fit_intercept : bool, default=True Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. - max_iter : int, optional (default=1000) + max_iter : int, default=1000 The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19 - tol : float or None, optional (default=1e-3) + tol : float or None, default=1e-3 The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). @@ -59,15 +59,15 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): shuffle : bool, default=True Whether or not the training data should be shuffled after each epoch. - verbose : integer, optional + verbose : integer, default=0 The verbosity level - loss : string, optional + loss : string, default="hinge" The loss function to be used: hinge: equivalent to PA-I in the reference paper. squared_hinge: equivalent to PA-II in the reference paper. - n_jobs : int or None, optional (default=None) + n_jobs : int or None, default=None The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. @@ -80,7 +80,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): function calls. See :term:`Glossary `. - warm_start : bool, optional + warm_start : bool, default=False When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. @@ -89,7 +89,8 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): result in a different solution than when calling fit a single time because of the way the data is shuffled. - class_weight : dict, {class_label: weight} or "balanced" or None, optional + class_weight : dict, {class_label: weight} or "balanced" or None, \ + default=None Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes @@ -102,7 +103,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier): .. versionadded:: 0.17 parameter *class_weight* to automatically weight samples. - average : bool or int, optional + average : bool or int, default=False When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches @@ -266,21 +267,21 @@ class PassiveAggressiveRegressor(BaseSGDRegressor): Parameters ---------- - C : float + C : float, default=1.0 Maximum step size (regularization). Defaults to 1.0. - fit_intercept : bool + fit_intercept : bool, default=True Whether the intercept should be estimated or not. If False, the data is assumed to be already centered. Defaults to True. - max_iter : int, optional (default=1000) + max_iter : int, default=1000 The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19 - tol : float or None, optional (default=1e-3) + tol : float or None, default=1e-3 The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). @@ -310,16 +311,16 @@ class PassiveAggressiveRegressor(BaseSGDRegressor): shuffle : bool, default=True Whether or not the training data should be shuffled after each epoch. - verbose : integer, optional + verbose : integer, default=0 The verbosity level - loss : string, optional + loss : string, default="epsilon_insensitive" The loss function to be used: epsilon_insensitive: equivalent to PA-I in the reference paper. squared_epsilon_insensitive: equivalent to PA-II in the reference paper. - epsilon : float + epsilon : float, default=DEFAULT_EPSILON If the difference between the current prediction and the correct label is below this threshold, the model is not updated. @@ -329,7 +330,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor): function calls. See :term:`Glossary `. - warm_start : bool, optional + warm_start : bool, default=False When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. @@ -338,7 +339,7 @@ class PassiveAggressiveRegressor(BaseSGDRegressor): result in a different solution than when calling fit a single time because of the way the data is shuffled. - average : bool or int, optional + average : bool or int, default=False When set to True, computes the averaged SGD weights and stores the result in the ``coef_`` attribute. If set to an int greater than 1, averaging will begin once the total number of samples seen reaches diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index 133bafcc6cdee..3ce71e4b4b809 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -65,7 +65,7 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin, Parameters ---------- - base_estimator : object, optional + base_estimator : object, default=None Base estimator object which implements the following methods: * `fit(X, y)`: Fit model to given training data and target values. @@ -83,7 +83,7 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin, Note that the current implementation only supports regression estimators. - min_samples : int (>= 1) or float ([0, 1]), optional + min_samples : int (>= 1) or float ([0, 1]), default=None Minimum number of samples chosen randomly from original data. Treated as an absolute number of samples for `min_samples >= 1`, treated as a relative number `ceil(min_samples * X.shape[0]`) for @@ -92,17 +92,17 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin, ``sklearn.linear_model.LinearRegression()`` estimator is assumed and `min_samples` is chosen as ``X.shape[1] + 1``. - residual_threshold : float, optional + residual_threshold : float, default=None Maximum residual for a data sample to be classified as an inlier. By default the threshold is chosen as the MAD (median absolute deviation) of the target values `y`. - is_data_valid : callable, optional + is_data_valid : callable, default=None This function is called with the randomly selected data before the model is fitted to it: `is_data_valid(X, y)`. If its return value is False the current randomly chosen sub-sample is skipped. - is_model_valid : callable, optional + is_model_valid : callable, default=None This function is called with the estimated model and the randomly selected data: `is_model_valid(model, X, y)`. If its return value is False the current randomly chosen sub-sample is skipped. @@ -110,23 +110,23 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin, with `is_data_valid`. `is_model_valid` should therefore only be used if the estimated model is needed for making the rejection decision. - max_trials : int, optional + max_trials : int, default=100 Maximum number of iterations for random sample selection. - max_skips : int, optional + max_skips : int, default=np.inf Maximum number of iterations that can be skipped due to finding zero inliers or invalid data defined by ``is_data_valid`` or invalid models defined by ``is_model_valid``. .. versionadded:: 0.19 - stop_n_inliers : int, optional + stop_n_inliers : int, default=np.inf Stop iteration if at least this number of inliers are found. - stop_score : float, optional + stop_score : float, default=np.inf Stop iteration if score is greater equal than this threshold. - stop_probability : float in range [0, 1], optional + stop_probability : float in range [0, 1], default=0.99 RANSAC iteration stops if at least one outlier-free set of the training data is sampled in RANSAC. This requires to generate at least N samples (iterations):: @@ -137,7 +137,7 @@ class RANSACRegressor(MetaEstimatorMixin, RegressorMixin, as 0.99 (the default) and e is the current fraction of inliers w.r.t. the total number of samples. - loss : string, callable, optional, default "absolute_loss" + loss : string, callable, default='absolute_loss' String inputs, "absolute_loss" and "squared_loss" are supported which find the absolute loss and squared loss per sample respectively. diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py index 28d2dba3f8719..722c69cddf8c6 100644 --- a/sklearn/linear_model/_theil_sen.py +++ b/sklearn/linear_model/_theil_sen.py @@ -209,14 +209,14 @@ class TheilSenRegressor(RegressorMixin, LinearModel): Parameters ---------- - fit_intercept : boolean, optional, default True + fit_intercept : boolean, default=True Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations. - copy_X : boolean, optional, default True + copy_X : boolean, default=True If True, X will be copied; else, it may be overwritten. - max_subpopulation : int, optional, default 1e4 + max_subpopulation : int, default=1e4 Instead of computing with a set of cardinality 'n choose k', where n is the number of samples and k is the number of subsamples (at least number of features), consider only a stochastic subpopulation of a @@ -224,7 +224,7 @@ class TheilSenRegressor(RegressorMixin, LinearModel): For other than small problem sizes this parameter will determine memory usage and runtime if n_subsamples is not changed. - n_subsamples : int, optional, default None + n_subsamples : int, default=None Number of samples to calculate the parameters. This is at least the number of features (plus 1 if fit_intercept=True) and the number of samples as a maximum. A lower number leads to a higher breakdown @@ -234,10 +234,10 @@ class TheilSenRegressor(RegressorMixin, LinearModel): If n_subsamples is set to n_samples, Theil-Sen is identical to least squares. - max_iter : int, optional, default 300 + max_iter : int, default=300 Maximum number of iterations for the calculation of spatial median. - tol : float, optional, default 1.e-3 + tol : float, default=1.e-3 Tolerance when calculating spatial median. random_state : int, RandomState instance, default=None @@ -246,13 +246,13 @@ class TheilSenRegressor(RegressorMixin, LinearModel): multiple function calls. See :term:`Glossary ` - n_jobs : int or None, optional (default=None) + n_jobs : int or None, default=None Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. - verbose : boolean, optional, default False + verbose : boolean, default=False Verbose mode when fitting the model. Attributes From ca94a795ce59b8be582bc9c658cb10cac40f2165 Mon Sep 17 00:00:00 2001 From: genvalen Date: Sat, 6 Jun 2020 23:54:12 -0400 Subject: [PATCH 02/42] Update default docstrings for Metrics --- sklearn/metrics/_base.py | 2 +- sklearn/metrics/_classification.py | 76 +++++++++---------- sklearn/metrics/_ranking.py | 34 ++++----- sklearn/metrics/_regression.py | 26 ++++--- sklearn/metrics/_scorer.py | 10 +-- sklearn/metrics/cluster/_bicluster.py | 2 +- sklearn/metrics/cluster/_supervised.py | 17 +++-- sklearn/metrics/cluster/_unsupervised.py | 8 +- sklearn/metrics/pairwise.py | 97 ++++++++++++------------ 9 files changed, 138 insertions(+), 134 deletions(-) diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py index 63c74c0663adb..5f09dc0f0dd27 100644 --- a/sklearn/metrics/_base.py +++ b/sklearn/metrics/_base.py @@ -156,7 +156,7 @@ def _average_multiclass_ovo_score(binary_metric, y_true, y_score, Target scores corresponding to probability estimates of a sample belonging to a particular class - average : 'macro' or 'weighted', optional (default='macro') + average : 'macro' or 'weighted', default='macro' Determines the type of averaging performed on the pairwise binary metric scores ``'macro'``: diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 2ceccca65203e..c3d98ee51a439 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -140,7 +140,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None): y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - normalize : bool, optional (default=True) + normalize : bool, default=True If ``False``, return the number of correctly classified samples. Otherwise, return the fraction of correctly classified samples. @@ -370,7 +370,7 @@ def multilabel_confusion_matrix(y_true, y_pred, *, sample_weight=None, sample_weight : array-like of shape (n_samples,), default=None Sample weights - labels : array-like + labels : array-like, default=None A list of classes or column indices to select some (or to force inclusion of classes absent from the data) @@ -568,12 +568,12 @@ class labels [2]_. Labels assigned by the second annotator. The kappa statistic is symmetric, so swapping ``y1`` and ``y2`` doesn't change the value. - labels : array, shape = [n_classes], optional + labels : array, shape = [n_classes], default=None List of labels to index the matrix. This may be used to select a subset of labels. If None, all labels that appear at least once in ``y1`` or ``y2`` are used. - weights : str, optional + weights : str, default=None Weighting type to calculate the score. None means no weighted; "linear" means linear weighted; "quadratic" means quadratic weighted. @@ -641,7 +641,7 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1, y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - labels : list, optional + labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -650,14 +650,14 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. - pos_label : str or int, 1 by default + pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ - 'weighted'] + average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ + default = 'binary'] If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -869,7 +869,7 @@ def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None): y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - normalize : bool, optional (default=True) + normalize : bool, default=True If ``False``, return the number of misclassifications. Otherwise, return the fraction of misclassifications. @@ -948,7 +948,7 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : list, optional + labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -960,14 +960,14 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', .. versionchanged:: 0.17 parameter *labels* improved for multiclass problem. - pos_label : str or int, 1 by default + pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ - 'weighted'] + average : string, [None, 'micro', 'macro', 'samples','weighted', \ + default='binary'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -1073,7 +1073,7 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1, beta : float Determines the weight of recall in the combined score. - labels : list, optional + labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -1085,14 +1085,14 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1, .. versionchanged:: 0.17 parameter *labels* improved for multiclass problem. - pos_label : str or int, 1 by default + pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ - 'weighted'] + average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ + default='binary'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -1312,10 +1312,10 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None, y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - beta : float, 1.0 by default + beta : float, default=1.0 The strength of recall versus precision in the F-score. - labels : list, optional + labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -1324,14 +1324,14 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. - pos_label : str or int, 1 by default + pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ - 'weighted'] + average : string, ['binary', 'micro', 'macro', 'samples','weighted', \ + default=None] If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -1527,7 +1527,7 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1, y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : list, optional + labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -1539,14 +1539,14 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1, .. versionchanged:: 0.17 parameter *labels* improved for multiclass problem. - pos_label : str or int, 1 by default + pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ - 'weighted'] + average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ + default='binary'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -1645,7 +1645,7 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : list, optional + labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a @@ -1657,14 +1657,14 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', .. versionchanged:: 0.17 parameter *labels* improved for multiclass problem. - pos_label : str or int, 1 by default + pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ - 'weighted'] + average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ + default='binary'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -1838,21 +1838,21 @@ def classification_report(y_true, y_pred, *, labels=None, target_names=None, y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : array, shape = [n_labels] + labels : array, shape = [n_labels], default=None Optional list of label indices to include in the report. - target_names : list of strings + target_names : list of strings, default=None Optional display names matching the labels (same order). sample_weight : array-like of shape (n_samples,), default=None Sample weights. - digits : int + digits : int, default=2 Number of digits for formatting output floating point values. When ``output_dict`` is ``True``, this will be ignored and the returned values will not be rounded. - output_dict : bool (default = False) + output_dict : bool, default=False If True, return output as dict .. versionadded:: 0.20 @@ -2145,18 +2145,18 @@ def log_loss(y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None, ordered alphabetically, as done by :class:`preprocessing.LabelBinarizer`. - eps : float + eps : float, default=1e-15 Log loss is undefined for p=0 or p=1, so probabilities are clipped to max(eps, min(1 - eps, p)). - normalize : bool, optional (default=True) + normalize : bool, default=True If true, return the mean loss per sample. Otherwise, return the sum of the per-sample losses. sample_weight : array-like of shape (n_samples,), default=None Sample weights. - labels : array-like, optional (default=None) + labels : array-like, default=None If not provided, labels will be inferred from y_true. If ``labels`` is ``None`` and ``y_pred`` has shape (n_samples,) the labels are assumed to be binary and are inferred from ``y_true``. @@ -2269,7 +2269,7 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None): pred_decision : array, shape = [n_samples] or [n_samples, n_classes] Predicted decisions, as output by decision_function (floats). - labels : array, optional, default None + labels : array, default=None Contains all the labels for the problem. Used in multiclass hinge loss. sample_weight : array-like of shape (n_samples,), default=None diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 8085711246e56..93623d950be63 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -135,7 +135,7 @@ def average_precision_score(y_true, y_score, *, average="macro", pos_label=1, class, confidence values, or non-thresholded measure of decisions (as returned by "decision_function" on some classifiers). - average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted'] + average : string, [None, 'micro', 'samples', 'weighted', default="macro"] If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -153,7 +153,7 @@ def average_precision_score(y_true, y_score, *, average="macro", pos_label=1, Will be ignored when ``y_true`` is binary. - pos_label : int or str (default=1) + pos_label : int or str, default=1 The label of the positive class. Only applied to binary ``y_true``. For multilabel-indicator ``y_true``, ``pos_label`` is fixed to 1. @@ -411,7 +411,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels, Target scores corresponding to probability estimates of a sample belonging to a particular class - labels : array, shape = [n_classes] or None, optional (default=None) + labels : array, shape = [n_classes] or None List of labels to index ``y_score`` used for multiclass. If ``None``, the lexical order of ``y_true`` is used to index ``y_score``. @@ -424,7 +424,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels, Calculate metrics for the multiclass case using the one-vs-one approach. - average : 'macro' or 'weighted', optional (default='macro') + average : 'macro' or 'weighted' Determines the type of averaging performed on the pairwise binary metric scores ``'macro'``: @@ -435,7 +435,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels, Calculate metrics for each label, taking into account the prevalence of the classes. - sample_weight : array-like of shape (n_samples,), default=None + sample_weight : array-like of shape (n_samples,) Sample weights. """ @@ -1075,15 +1075,15 @@ def _dcg_sample_scores(y_true, y_score, k=None, or non-thresholded measure of decisions (as returned by "decision_function" on some classifiers). - k : int, optional (default=None) + k : int, default=None Only consider the highest k scores in the ranking. If None, use all outputs. - log_base : float, optional (default=2) + log_base : float, default=2 Base of the logarithm used for the discount. A low value means a sharper discount (top results are more important). - ignore_ties : bool, optional (default=False) + ignore_ties : bool, default=False Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains. @@ -1199,18 +1199,18 @@ def dcg_score(y_true, y_score, *, k=None, or non-thresholded measure of decisions (as returned by "decision_function" on some classifiers). - k : int, optional (default=None) + k : int, default=None Only consider the highest k scores in the ranking. If None, use all outputs. - log_base : float, optional (default=2) + log_base : float, default=2 Base of the logarithm used for the discount. A low value means a sharper discount (top results are more important). - sample_weight : ndarray, shape (n_samples,), optional (default=None) + sample_weight : ndarray, shape (n_samples,), default=None Sample weights. If None, all samples are given the same weight. - ignore_ties : bool, optional (default=False) + ignore_ties : bool, default=False Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains. @@ -1303,11 +1303,11 @@ def _ndcg_sample_scores(y_true, y_score, k=None, ignore_ties=False): or non-thresholded measure of decisions (as returned by "decision_function" on some classifiers). - k : int, optional (default=None) + k : int, default=None Only consider the highest k scores in the ranking. If None, use all outputs. - ignore_ties : bool, optional (default=False) + ignore_ties : bool, default=False Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains. @@ -1356,14 +1356,14 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, or non-thresholded measure of decisions (as returned by "decision_function" on some classifiers). - k : int, optional (default=None) + k : int, default=None Only consider the highest k scores in the ranking. If None, use all outputs. - sample_weight : ndarray, shape (n_samples,), optional (default=None) + sample_weight : ndarray, shape (n_samples,),default=None Sample weights. If None, all samples are given the same weight. - ignore_ties : bool, optional (default=False) + ignore_ties : bool, default=False Assume that there are no ties in y_score (which is likely to be the case if y_score is continuous) for efficiency gains. diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index e805bdc099d1f..0168f49fdf476 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -136,11 +136,11 @@ def mean_absolute_error(y_true, y_pred, *, y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. - sample_weight : array-like of shape (n_samples,), optional + sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : string in ['raw_values', 'uniform_average'] \ - or array-like of shape (n_outputs) + or array-like of shape (n_outputs), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. @@ -208,11 +208,11 @@ def mean_squared_error(y_true, y_pred, *, y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. - sample_weight : array-like of shape (n_samples,), optional + sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : string in ['raw_values', 'uniform_average'] \ - or array-like of shape (n_outputs) + or array-like of shape (n_outputs), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. @@ -222,7 +222,7 @@ def mean_squared_error(y_true, y_pred, *, 'uniform_average' : Errors of all outputs are averaged with uniform weight. - squared : boolean value, optional (default = True) + squared : boolean value, default=True If True returns MSE value, if False returns RMSE value. Returns @@ -289,11 +289,11 @@ def mean_squared_log_error(y_true, y_pred, *, y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. - sample_weight : array-like of shape (n_samples,), optional + sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : string in ['raw_values', 'uniform_average'] \ - or array-like of shape (n_outputs) + or array-like of shape (n_outputs), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. @@ -358,7 +358,7 @@ def median_absolute_error(y_true, y_pred, *, multioutput='uniform_average', Estimated target values. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ - (n_outputs,) + (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. @@ -434,11 +434,12 @@ def explained_variance_score(y_true, y_pred, *, y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. - sample_weight : array-like of shape (n_samples,), optional + sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : string in ['raw_values', 'uniform_average', \ - 'variance_weighted'] or array-like of shape (n_outputs) + 'variance_weighted'] or array-like of shape (n_outputs), \ + default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. @@ -529,11 +530,12 @@ def r2_score(y_true, y_pred, *, sample_weight=None, y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. - sample_weight : array-like of shape (n_samples,), optional + sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : string in ['raw_values', 'uniform_average', \ -'variance_weighted'] or None or array-like of shape (n_outputs) + 'variance_weighted'] or None or array-like of shape (n_outputs), \ + default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index f116f76edb260..2508529b1fc39 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -150,7 +150,7 @@ def __call__(self, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like, optional (default=None) + sample_weight : array-like, default=None Sample weights. Returns @@ -186,7 +186,7 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like, optional (default=None) + sample_weight : array-like, default=None Sample weights. Returns @@ -371,12 +371,12 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): estimator : estimator object implementing 'fit' The object to use to fit the data. - scoring : string, callable or None, optional, default: None + scoring : string, callable or None, optional, default=None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. - allow_none : boolean, optional, default: False + allow_none : boolean, default=False If no scoring is specified and the estimator has no score function, we can either return None or raise an exception. @@ -431,7 +431,7 @@ def _check_multimetric_scoring(estimator, scoring=None): estimator : sklearn estimator instance The estimator for which the scoring will be applied. - scoring : string, callable, list/tuple, dict or None, default: None + scoring : string, callable, list/tuple, dict or None, default=None A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py index ac0d0a454a74a..8f967cee6264e 100644 --- a/sklearn/metrics/cluster/_bicluster.py +++ b/sklearn/metrics/cluster/_bicluster.py @@ -64,7 +64,7 @@ def consensus_score(a, b, *, similarity="jaccard"): b : (rows, columns) Another set of biclusters like ``a``. - similarity : string or function, optional, default: "jaccard" + similarity : string or function, default=jaccard May be the string "jaccard" to use the Jaccard coefficient, or any function that takes four arguments, each of which is a 1d indicator vector: (a_rows, a_columns, b_rows, b_columns). diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index d652737bd23c0..b561d0e5015a4 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -90,12 +90,12 @@ def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False): labels_pred : array-like of shape (n_samples,) Cluster labels to evaluate - eps : None or float, optional. + eps : None or float, default=None If a float, that value is added to all values in the contingency matrix. This helps to stop NaN propagation. If ``None``, nothing is adjusted. - sparse : boolean, optional. + sparse : boolean, default=False If True, return a sparse CSR continency matrix. If ``eps is not None``, and ``sparse is True``, will throw ValueError. @@ -280,7 +280,7 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0): labels_pred : array-like of shape (n_samples,) cluster labels to evaluate - beta : float + beta : float, default=1.0 Ratio of weight attributed to ``homogeneity`` vs ``completeness``. If ``beta`` is greater than 1, ``completeness`` is weighted more strongly in the calculation. If ``beta`` is less than 1, @@ -498,7 +498,7 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0): labels_pred : array-like of shape (n_samples,) cluster labels to evaluate - beta : float + beta : float, default=1.0 Ratio of weight attributed to ``homogeneity`` vs ``completeness``. If ``beta`` is greater than 1, ``completeness`` is weighted more strongly in the calculation. If ``beta`` is less than 1, @@ -602,7 +602,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None): A clustering of the data into disjoint subsets. contingency : {None, array, sparse matrix}, \ - shape = [n_classes_true, n_classes_pred] + shape = [n_classes_true, n_classes_pred], \ + default=None A contingency matrix given by the :func:`contingency_matrix` function. If value is ``None``, it will be computed, otherwise the given value is used, with ``labels_true`` and ``labels_pred`` ignored. @@ -689,7 +690,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, *, labels_pred : int array-like of shape (n_samples,) A clustering of the data into disjoint subsets. - average_method : string, optional (default: 'arithmetic') + average_method : string, default='arithmetic' How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. @@ -809,7 +810,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, *, labels_pred : int array-like of shape (n_samples,) A clustering of the data into disjoint subsets. - average_method : string, optional (default: 'arithmetic') + average_method : string, default='arithmetic' How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. @@ -909,7 +910,7 @@ def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False): labels_pred : array, shape = (``n_samples``, ) A clustering of the data into disjoint subsets. - sparse : bool + sparse : bool, default=False Compute contingency matrix internally with sparse matrix. Returns diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index ce5563c4763d3..13ed169e69169 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -66,19 +66,19 @@ def silhouette_score(X, labels, *, metric='euclidean', sample_size=None, labels : array, shape = [n_samples] Predicted labels for each sample. - metric : string, or callable + metric : string, or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`metrics.pairwise.pairwise_distances `. If X is the distance array itself, use ``metric="precomputed"``. - sample_size : int or None + sample_size : int or None, default=None The size of the sample to use when computing the Silhouette Coefficient on a random subset of the data. If ``sample_size is None``, no sampling is used. - random_state : int, RandomState instance or None, optional (default=None) + random_state : int, RandomState instance or None, default=None Determines random number generation for selecting a subset of samples. Used when ``sample_size is not None``. Pass an int for reproducible results across multiple function calls. @@ -182,7 +182,7 @@ def silhouette_samples(X, labels, *, metric='euclidean', **kwds): labels : array, shape = [n_samples] label values for each sample - metric : string, or callable + metric : string, or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If X is diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 2424c84394e2b..3406e0c316459 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -82,24 +82,24 @@ def check_pairwise_arrays(X, Y, *, precomputed=False, dtype=None, Y : {array-like, sparse matrix}, shape (n_samples_b, n_features) - precomputed : bool + precomputed : bool, default=False True if X is to be treated as precomputed distances to the samples in Y. - dtype : string, type, list of types or None (default=None) + dtype : string, type, list of types or None, default=None Data type required for X and Y. If None, the dtype will be an appropriate float type selected by _return_float_dtype. .. versionadded:: 0.18 - accept_sparse : string, boolean or list/tuple of strings + accept_sparse : string, boolean or list/tuple of strings, default='csr' String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error. - force_all_finite : boolean or 'allow-nan', (default=True) + force_all_finite : boolean or 'allow-nan', default=True Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: @@ -114,7 +114,7 @@ def check_pairwise_arrays(X, Y, *, precomputed=False, dtype=None, .. versionchanged:: 0.23 Accepts `pd.NA` and converts it into `np.nan` - copy : bool + copy : bool, default=False Whether a forced copy will be triggered. If copy=False, a copy might be triggered by a conversion. @@ -226,15 +226,15 @@ def euclidean_distances(X, Y=None, *, Y_norm_squared=None, squared=False, Y : {array-like, sparse matrix}, shape (n_samples_2, n_features) - Y_norm_squared : array-like, shape (n_samples_2, ), optional + Y_norm_squared : array-like, shape (n_samples_2, ), default=None Pre-computed dot-products of vectors in Y (e.g., ``(Y**2).sum(axis=1)``) May be ignored in some cases, see the note below. - squared : boolean, optional + squared : boolean, default=False Return squared Euclidean distances. - X_norm_squared : array-like of shape (n_samples,), optional + X_norm_squared : array-like of shape (n_samples,), default=None Pre-computed dot-products of vectors in X (e.g., ``(X**2).sum(axis=1)``) May be ignored in some cases, see the note below. @@ -534,10 +534,10 @@ def pairwise_distances_argmin_min(X, Y, *, axis=1, metric="euclidean", Y : {array-like, sparse matrix}, shape (n_samples2, n_features) Arrays containing points. - axis : int, optional, default 1 + axis : int, optional, default=1 Axis along which the argmin and distances are to be computed. - metric : string or callable, default 'euclidean' + metric : string or callable, default='euclidean' metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. @@ -563,7 +563,7 @@ def pairwise_distances_argmin_min(X, Y, *, axis=1, metric="euclidean", See the documentation for scipy.spatial.distance for details on these metrics. - metric_kwargs : dict, optional + metric_kwargs : dict, default=None Keyword arguments to pass to specified metric function. Returns @@ -623,10 +623,10 @@ def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean", Arrays containing points. Respective shapes (n_samples1, n_features) and (n_samples2, n_features) - axis : int, optional, default 1 + axis : int, optional, default=1 Axis along which the argmin and distances are to be computed. - metric : string or callable + metric : string or callable, default="euclidean" metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. @@ -652,7 +652,7 @@ def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean", See the documentation for scipy.spatial.distance for details on these metrics. - metric_kwargs : dict + metric_kwargs : dict, default=None keyword arguments to pass to specified metric function. Returns @@ -688,7 +688,7 @@ def haversine_distances(X, Y=None): ---------- X : array_like, shape (n_samples_1, 2) - Y : array_like, shape (n_samples_2, 2), optional + Y : array_like, shape (n_samples_2, 2), default=None Returns ------- @@ -814,8 +814,8 @@ def cosine_distances(X, Y=None): X : array_like, sparse matrix with shape (n_samples_X, n_features). - Y : array_like, sparse matrix (optional) - with shape (n_samples_Y, n_features). + Y : array_like, sparse matrix with shape + (n_samples_Y, n_features), default=None. Returns ------- @@ -935,7 +935,7 @@ def paired_distances(X, Y, *, metric="euclidean", **kwds): Y : ndarray (n_samples, n_features) Array 2 for distance computation. - metric : string or callable + metric : string or callable, default="euclidean" The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options specified in PAIRED_DISTANCES, including "euclidean", @@ -987,9 +987,9 @@ def linear_kernel(X, Y=None, dense_output=True): ---------- X : array of shape (n_samples_1, n_features) - Y : array of shape (n_samples_2, n_features) + Y : array of shape (n_samples_2, n_features), default=None - dense_output : boolean (optional), default True + dense_output : boolean (optional), default=True Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. @@ -1015,14 +1015,14 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1): ---------- X : ndarray of shape (n_samples_1, n_features) - Y : ndarray of shape (n_samples_2, n_features) + Y : ndarray of shape (n_samples_2, n_features), default=None - degree : int, default 3 + degree : int, default=3 - gamma : float, default None + gamma : float, default=None if None, defaults to 1.0 / n_features - coef0 : float, default 1 + coef0 : float, default=1 Returns ------- @@ -1051,12 +1051,12 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1): ---------- X : ndarray of shape (n_samples_1, n_features) - Y : ndarray of shape (n_samples_2, n_features) + Y : ndarray of shape (n_samples_2, n_features), default=None - gamma : float, default None + gamma : float, default=None If None, defaults to 1.0 / n_features - coef0 : float, default 1 + coef0 : float, default=1 Returns ------- @@ -1087,9 +1087,9 @@ def rbf_kernel(X, Y=None, gamma=None): ---------- X : array of shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array of shape (n_samples_Y, n_features), default=None - gamma : float, default None + gamma : float, default=None If None, defaults to 1.0 / n_features Returns @@ -1122,9 +1122,9 @@ def laplacian_kernel(X, Y=None, gamma=None): ---------- X : array of shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array of shape (n_samples_Y, n_features), default=None - gamma : float, default None + gamma : float, default=None If None, defaults to 1.0 / n_features Returns @@ -1157,11 +1157,12 @@ def cosine_similarity(X, Y=None, dense_output=True): X : ndarray or sparse array, shape: (n_samples_X, n_features) Input data. - Y : ndarray or sparse array, shape: (n_samples_Y, n_features) + Y : ndarray or sparse array, shape: (n_samples_Y, n_features), \ + default=None Input data. If ``None``, the output will be the pairwise similarities between all samples in ``X``. - dense_output : boolean (optional), default True + dense_output : boolean (optional), default=True Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. @@ -1214,7 +1215,7 @@ def additive_chi2_kernel(X, Y=None): ---------- X : array-like of shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array of shape (n_samples_Y, n_features), default=None Returns ------- @@ -1269,7 +1270,7 @@ def chi2_kernel(X, Y=None, gamma=1.): ---------- X : array-like of shape (n_samples_X, n_features) - Y : array of shape (n_samples_Y, n_features) + Y : array of shape (n_samples_Y, n_features), default=None gamma : float, default=1. Scaling parameter of the chi2 kernel. @@ -1479,11 +1480,11 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None, [n_samples_a, n_features] otherwise Array of pairwise distances between samples, or a feature array. - Y : array [n_samples_b, n_features], optional + Y : array [n_samples_b, n_features], default=None An optional second feature array. Only allowed if metric != "precomputed". - reduce_func : callable, optional + reduce_func : callable, default=None The function which is applied on each chunk of the distance matrix, reducing it to needed values. ``reduce_func(D_chunk, start)`` is called repeatedly, where ``D_chunk`` is a contiguous vertical @@ -1495,7 +1496,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None, If None, pairwise_distances_chunked returns a generator of vertical chunks of the distance matrix. - metric : string, or callable + metric : string, or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or @@ -1506,7 +1507,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None, should take two arrays from X as input and return a value indicating the distance between them. - n_jobs : int or None, optional (default=None) + n_jobs : int or None, default=None The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. @@ -1515,7 +1516,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None, ``-1`` means using all processors. See :term:`Glossary ` for more details. - working_memory : int, optional + working_memory : int, default=None The sought maximum memory for temporary distance matrix chunks. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used. @@ -1669,11 +1670,11 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None, [n_samples_a, n_features] otherwise Array of pairwise distances between samples, or a feature array. - Y : array [n_samples_b, n_features], optional + Y : array [n_samples_b, n_features], default=None An optional second feature array. Only allowed if metric != "precomputed". - metric : string, or callable + metric : string, or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or @@ -1684,7 +1685,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None, should take two arrays from X as input and return a value indicating the distance between them. - n_jobs : int or None, optional (default=None) + n_jobs : int or None, default=None The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. @@ -1693,7 +1694,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None, ``-1`` means using all processors. See :term:`Glossary ` for more details. - force_all_finite : boolean or 'allow-nan', (default=True) + force_all_finite : boolean or 'allow-nan', default=True Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: @@ -1871,10 +1872,10 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False, [n_samples_a, n_features] otherwise Array of pairwise kernels between samples, or a feature array. - Y : array [n_samples_b, n_features] + Y : array [n_samples_b, n_features], default=None A second feature array only if X has shape [n_samples_a, n_features]. - metric : string, or callable + metric : string, or callable, default="linear" The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. @@ -1887,10 +1888,10 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False, matrices, not single samples. Use the string identifying the kernel instead. - filter_params : boolean + filter_params : boolean, default=False Whether to filter invalid parameters or not. - n_jobs : int or None, optional (default=None) + n_jobs : int or None, default=None The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. From cc09f9d914753339c833a2da4b111f70deb6ee35 Mon Sep 17 00:00:00 2001 From: genvalen Date: Sun, 7 Jun 2020 00:00:47 -0400 Subject: [PATCH 03/42] Fix lint error --- sklearn/metrics/_regression.py | 4 ++-- sklearn/metrics/pairwise.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 0168f49fdf476..6c225fc39f918 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -534,8 +534,8 @@ def r2_score(y_true, y_pred, *, sample_weight=None, Sample weights. multioutput : string in ['raw_values', 'uniform_average', \ - 'variance_weighted'] or None or array-like of shape (n_outputs), \ - default='uniform_average' + 'variance_weighted'] or None or array-like of \ + shape (n_outputs), default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 3406e0c316459..8b3a25c4f9b60 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -814,7 +814,7 @@ def cosine_distances(X, Y=None): X : array_like, sparse matrix with shape (n_samples_X, n_features). - Y : array_like, sparse matrix with shape + Y : array_like, sparse matrix with shape (n_samples_Y, n_features), default=None. Returns From c8cd4249098a5a4ce66751ffeb31ecb9f39d2e96 Mon Sep 17 00:00:00 2001 From: genvalen Date: Sun, 7 Jun 2020 00:25:48 -0400 Subject: [PATCH 04/42] Resolve merge conflicts --- sklearn/metrics/pairwise.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index b1c72b9df494d..8506134227045 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -688,11 +688,7 @@ def haversine_distances(X, Y=None): ---------- X : array-like, shape (n_samples_1, 2) -<<<<<<< HEAD Y : array_like, shape (n_samples_2, 2), default=None -======= - Y : array-like, shape (n_samples_2, 2), optional ->>>>>>> upstream/master Returns ------- From da935acbda6e6474f13ee788eb3e5ef11e388d0c Mon Sep 17 00:00:00 2001 From: genvalen Date: Sun, 7 Jun 2020 00:33:32 -0400 Subject: [PATCH 05/42] Fix flake8 error --- sklearn/metrics/pairwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 8506134227045..5d72dd5006d72 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -811,7 +811,7 @@ def cosine_distances(X, Y=None): Parameters ---------- - X : array-like, sparse matrix + X : array-like, sparse matrix with shape (n_samples_X, n_features). Y : array_like, sparse matrix with shape \ From bede6d0c9d3ba3ed920140f7838f7dfd51f5c9b0 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:24:00 -0400 Subject: [PATCH 06/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index c3d98ee51a439..1fd32ee92e34d 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1330,8 +1330,8 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None, setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, ['binary', 'micro', 'macro', 'samples','weighted', \ - default=None] + average : {'binary', 'micro', 'macro', 'samples','weighted'}, \ + default=None If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: From 7b4647e083262dafd956e19e76c81290672816ed Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:24:15 -0400 Subject: [PATCH 07/42] Update sklearn/metrics/_classification.py Co-authored-by: Adrin Jalali --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 1fd32ee92e34d..3690d29d439be 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -568,7 +568,7 @@ class labels [2]_. Labels assigned by the second annotator. The kappa statistic is symmetric, so swapping ``y1`` and ``y2`` doesn't change the value. - labels : array, shape = [n_classes], default=None + labels : array-like of shape (n_classes,), default=None List of labels to index the matrix. This may be used to select a subset of labels. If None, all labels that appear at least once in ``y1`` or ``y2`` are used. From cfe8b2b8ccc6f58199fb7f7923e484988d772b78 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:24:27 -0400 Subject: [PATCH 08/42] Update sklearn/metrics/_base.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py index 5f09dc0f0dd27..21d0ab38f6a91 100644 --- a/sklearn/metrics/_base.py +++ b/sklearn/metrics/_base.py @@ -156,7 +156,7 @@ def _average_multiclass_ovo_score(binary_metric, y_true, y_score, Target scores corresponding to probability estimates of a sample belonging to a particular class - average : 'macro' or 'weighted', default='macro' + average : {'macro', 'weighted'}, default='macro' Determines the type of averaging performed on the pairwise binary metric scores ``'macro'``: From 1ec16e835b745f2e7961c58221b2a69503e88bad Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:24:48 -0400 Subject: [PATCH 09/42] Update sklearn/metrics/_classification.py Co-authored-by: Adrin Jalali --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 3690d29d439be..84326f3577262 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -656,8 +656,8 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1, setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ - default = 'binary'] + average : {None, 'micro', 'macro', 'samples', 'weighted', \ + 'binary'}, default='binary' If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: From 5e52205c587f0d33bfcb50912afbbfce95159635 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:26:43 -0400 Subject: [PATCH 10/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 84326f3577262..9d993491e3ab7 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -573,7 +573,7 @@ class labels [2]_. subset of labels. If None, all labels that appear at least once in ``y1`` or ``y2`` are used. - weights : str, default=None + weights : {'linear', 'quatratic'}, default=None Weighting type to calculate the score. None means no weighted; "linear" means linear weighted; "quadratic" means quadratic weighted. From 8df195e2a803cc6990cd849aaf5e571000fc251e Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:26:53 -0400 Subject: [PATCH 11/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 9d993491e3ab7..7a1191a7183ce 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -641,7 +641,7 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1, y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a From c93effdb7e147a8af0b8d88a402639f6807ef66f Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:27:03 -0400 Subject: [PATCH 12/42] Update sklearn/metrics/pairwise.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/pairwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 5d72dd5006d72..f069ec35e72a3 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -1695,7 +1695,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None, ``-1`` means using all processors. See :term:`Glossary ` for more details. - force_all_finite : boolean or 'allow-nan', default=True + force_all_finite : bool or 'allow-nan', default=True Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: From 57ca202167ef44c127f41896c69a0b64679c2fb1 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:27:15 -0400 Subject: [PATCH 13/42] Update sklearn/metrics/pairwise.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/pairwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index f069ec35e72a3..3a85781568a0e 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -1876,7 +1876,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False, Y : array [n_samples_b, n_features], default=None A second feature array only if X has shape [n_samples_a, n_features]. - metric : string, or callable, default="linear" + metric : str or callable, default="linear" The metric to use when calculating kernel between instances in a feature array. If metric is a string, it must be one of the metrics in pairwise.PAIRWISE_KERNEL_FUNCTIONS. From 393759291a50ec2f21888d72b39d5b27893b309a Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:27:32 -0400 Subject: [PATCH 14/42] Update sklearn/metrics/pairwise.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/pairwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 3a85781568a0e..64585723dfd16 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -1892,7 +1892,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False, filter_params : boolean, default=False Whether to filter invalid parameters or not. - n_jobs : int or None, default=None + n_jobs : int, default=None The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. From a069d00329df1821814f010ba1c02e8899f14f1e Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:27:46 -0400 Subject: [PATCH 15/42] Update sklearn/metrics/pairwise.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/pairwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 64585723dfd16..a96e960c66835 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -1889,7 +1889,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False, matrices, not single samples. Use the string identifying the kernel instead. - filter_params : boolean, default=False + filter_params : bool, default=False Whether to filter invalid parameters or not. n_jobs : int, default=None From a82ee380a2d448076d907ff3b21c6efacdb7f525 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:28:34 -0400 Subject: [PATCH 16/42] Update sklearn/metrics/pairwise.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/pairwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index a96e960c66835..f6a9e74c1da51 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -1873,7 +1873,7 @@ def pairwise_kernels(X, Y=None, metric="linear", *, filter_params=False, [n_samples_a, n_features] otherwise Array of pairwise kernels between samples, or a feature array. - Y : array [n_samples_b, n_features], default=None + Y : array of shape (n_samples_b, n_features), default=None A second feature array only if X has shape [n_samples_a, n_features]. metric : str or callable, default="linear" From 572ca85ee3337652f7bfe87e5829d079dcf735d1 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:29:39 -0400 Subject: [PATCH 17/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 7a1191a7183ce..41efd31d7a615 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -948,7 +948,7 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a From 89af3719450d986cc9f23a2f002d28906807cdce Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:29:53 -0400 Subject: [PATCH 18/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 41efd31d7a615..9a3763fb57ef7 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -966,8 +966,8 @@ def f1_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'micro', 'macro', 'samples','weighted', \ - default='binary'] + average : {'micro', 'macro', 'samples','weighted', 'binary'} or None, \ + default='binary' This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: From 601c75723d1037824a836473e7eb283d5efad455 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:35:56 -0400 Subject: [PATCH 19/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 9a3763fb57ef7..70b0994ae40c3 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1073,7 +1073,7 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1, beta : float Determines the weight of recall in the combined score. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a From 4ed383019ef8638eb73d5579f81abeaca12258b2 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:39:37 -0400 Subject: [PATCH 20/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 70b0994ae40c3..b3da0477dfb02 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1091,8 +1091,8 @@ def fbeta_score(y_true, y_pred, *, beta, labels=None, pos_label=1, setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ - default='binary'] + average : {'micro', 'macro', 'samples', 'weighted', 'binary'} or None \ + default='binary' This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: From 1fa4425779ea26b0bb3b7310557560aea1a06da1 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 16:56:38 -0400 Subject: [PATCH 21/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index b3da0477dfb02..71cf73b3c872c 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1315,7 +1315,7 @@ def precision_recall_fscore_support(y_true, y_pred, *, beta=1.0, labels=None, beta : float, default=1.0 The strength of recall versus precision in the F-score. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a From 48a5908594a2c73ceeb35c089b38fbc36ebf441d Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 17:02:15 -0400 Subject: [PATCH 22/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 71cf73b3c872c..1c8e7be68e42a 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1545,8 +1545,8 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1, setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ - default='binary'] + average : {'micro', 'macro', 'samples', 'weighted', 'binary'} \ + default='binary' This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: From 881f73387c524ae07b9a77374feba4f149df4f1d Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 17:02:31 -0400 Subject: [PATCH 23/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 1c8e7be68e42a..06d9bca37c25a 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1527,7 +1527,7 @@ def precision_score(y_true, y_pred, *, labels=None, pos_label=1, y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a From c85c836279e23d7f122035636648b45d8a90bce1 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 17:41:44 -0400 Subject: [PATCH 24/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 06d9bca37c25a..ccb263787c60f 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1645,7 +1645,7 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : list, default=None + labels : array-like, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a From 9286fcb7ab6980a88b935040a2f30295d7c15df2 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 17:41:58 -0400 Subject: [PATCH 25/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index ccb263787c60f..20050374910f9 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1663,8 +1663,8 @@ def recall_score(y_true, y_pred, *, labels=None, pos_label=1, average='binary', setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. - average : string, [None, 'micro', 'macro', 'samples', 'weighted', \ - default='binary'] + average : {'micro', 'macro', 'samples', 'weighted', 'binary'} \ + default='binary' This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: From 8367323a63c57046a6277db087170521ca14cce5 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 18:50:20 -0400 Subject: [PATCH 26/42] Update sklearn/metrics/_ranking.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index aef839fafeae2..3b711709bb359 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -1207,7 +1207,7 @@ def dcg_score(y_true, y_score, *, k=None, Base of the logarithm used for the discount. A low value means a sharper discount (top results are more important). - sample_weight : ndarray, shape (n_samples,), default=None + sample_weight : ndarray of shape (n_samples,), default=None Sample weights. If None, all samples are given the same weight. ignore_ties : bool, default=False From 056c8e9265977284f5c574ccd33b877dc7f32d24 Mon Sep 17 00:00:00 2001 From: genvalen Date: Tue, 9 Jun 2020 23:43:07 -0400 Subject: [PATCH 27/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 20050374910f9..5acfdb245095a 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1838,7 +1838,7 @@ def classification_report(y_true, y_pred, *, labels=None, target_names=None, y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. - labels : array, shape = [n_labels], default=None + labels : array-like of shape (n_labels,), default=None Optional list of label indices to include in the report. target_names : list of strings, default=None From 036b8b69f6766fbafa816cce5f90dd51af091cac Mon Sep 17 00:00:00 2001 From: genvalen Date: Wed, 10 Jun 2020 17:25:39 -0400 Subject: [PATCH 28/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 5acfdb245095a..f7eed56d81ab7 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -2269,7 +2269,7 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None): pred_decision : array, shape = [n_samples] or [n_samples, n_classes] Predicted decisions, as output by decision_function (floats). - labels : array, default=None + labels : array-like, default=None Contains all the labels for the problem. Used in multiclass hinge loss. sample_weight : array-like of shape (n_samples,), default=None From 1a83796199710e285be97db351a50d025052fd3f Mon Sep 17 00:00:00 2001 From: genvalen Date: Wed, 10 Jun 2020 17:25:53 -0400 Subject: [PATCH 29/42] Update sklearn/metrics/_classification.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index f7eed56d81ab7..d6070b38dfec5 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -1841,7 +1841,7 @@ def classification_report(y_true, y_pred, *, labels=None, target_names=None, labels : array-like of shape (n_labels,), default=None Optional list of label indices to include in the report. - target_names : list of strings, default=None + target_names : list of str of shape (n_labels,), default=None Optional display names matching the labels (same order). sample_weight : array-like of shape (n_samples,), default=None From a3d0e1a6a81da8fe68fb95def81cdaa69a251dd2 Mon Sep 17 00:00:00 2001 From: genvalen Date: Wed, 10 Jun 2020 17:57:41 -0400 Subject: [PATCH 30/42] Update sklearn/metrics/_ranking.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_ranking.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 3b711709bb359..869cdc132cc6d 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -135,7 +135,8 @@ def average_precision_score(y_true, y_score, *, average="macro", pos_label=1, class, confidence values, or non-thresholded measure of decisions (as returned by "decision_function" on some classifiers). - average : string, [None, 'micro', 'samples', 'weighted', default="macro"] + average : {'micro', 'samples', 'weighted', 'macro'} or None, \ + default='macro' If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: From 8573644c89230aa7312985d2d4da6bcefe3f4ba1 Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 00:01:58 -0400 Subject: [PATCH 31/42] Update sklearn/metrics/_ranking.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 869cdc132cc6d..69769cbadbc7d 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -412,7 +412,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels, Target scores corresponding to probability estimates of a sample belonging to a particular class - labels : array, shape = [n_classes] or None + labels : array-like of shape (n_classes,), default=None List of labels to index ``y_score`` used for multiclass. If ``None``, the lexical order of ``y_true`` is used to index ``y_score``. From c291010886c8704b2b41ae36e8341caad4534e37 Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 00:02:32 -0400 Subject: [PATCH 32/42] Update sklearn/metrics/_ranking.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 69769cbadbc7d..9156df1f8fc9b 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -1361,7 +1361,7 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, Only consider the highest k scores in the ranking. If None, use all outputs. - sample_weight : ndarray, shape (n_samples,),default=None + sample_weight : ndarray of shape (n_samples,),default=None Sample weights. If None, all samples are given the same weight. ignore_ties : bool, default=False From 40dc8b008c3d222047558b812cda501f94900a0d Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 20:52:11 -0400 Subject: [PATCH 33/42] Update sklearn/metrics/_ranking.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 9156df1f8fc9b..3d7628d69596a 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -425,7 +425,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels, Calculate metrics for the multiclass case using the one-vs-one approach. - average : 'macro' or 'weighted' + average : {'macro', 'weighted'}, default='macro' Determines the type of averaging performed on the pairwise binary metric scores ``'macro'``: From 769e6fb44f62cb4eaf7fdaadf1e32097e5c01ba0 Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 20:52:31 -0400 Subject: [PATCH 34/42] Update sklearn/metrics/_ranking.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 3d7628d69596a..7e2c9dfd552b0 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -436,7 +436,7 @@ def _multiclass_roc_auc_score(y_true, y_score, labels, Calculate metrics for each label, taking into account the prevalence of the classes. - sample_weight : array-like of shape (n_samples,) + sample_weight : array-like of shape (n_samples,), default=None Sample weights. """ From 0848721eafe2ab8f84e799644a86850a3278ed32 Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 20:57:07 -0400 Subject: [PATCH 35/42] Update sklearn/metrics/_regression.py Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 6c225fc39f918..e87106eb9320f 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -139,7 +139,7 @@ def mean_absolute_error(y_true, y_pred, *, sample_weight : array-like of shape (n_samples,), default=None Sample weights. - multioutput : string in ['raw_values', 'uniform_average'] \ + multioutput : {'raw_values', 'uniform_average'} \ or array-like of shape (n_outputs), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. From 37963a2a1ad56fb972f24516b417557fa6b2205d Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 21:01:30 -0400 Subject: [PATCH 36/42] Apply suggestions from code review Co-authored-by: Guillaume Lemaitre --- sklearn/metrics/_regression.py | 4 +-- sklearn/metrics/_scorer.py | 10 +++--- sklearn/metrics/cluster/_bicluster.py | 2 +- sklearn/metrics/cluster/_supervised.py | 12 +++---- sklearn/metrics/cluster/_unsupervised.py | 8 ++--- sklearn/metrics/pairwise.py | 42 ++++++++++++------------ 6 files changed, 39 insertions(+), 39 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index e87106eb9320f..685297af80cb7 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -292,7 +292,7 @@ def mean_squared_log_error(y_true, y_pred, *, sample_weight : array-like of shape (n_samples,), default=None Sample weights. - multioutput : string in ['raw_values', 'uniform_average'] \ + multioutput : {'raw_values', 'uniform_average'} \ or array-like of shape (n_outputs), default='uniform_average' Defines aggregating of multiple output values. @@ -533,7 +533,7 @@ def r2_score(y_true, y_pred, *, sample_weight=None, sample_weight : array-like of shape (n_samples,), default=None Sample weights. - multioutput : string in ['raw_values', 'uniform_average', \ + multioutput : {'raw_values', 'uniform_average', 'variance_weigthed'}, None or \ 'variance_weighted'] or None or array-like of \ shape (n_outputs), default='uniform_average' diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index 2508529b1fc39..bed3fb496f72a 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -150,7 +150,7 @@ def __call__(self, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like, default=None + sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns @@ -186,7 +186,7 @@ def _score(self, method_caller, estimator, X, y_true, sample_weight=None): y_true : array-like Gold standard target values for X. - sample_weight : array-like, default=None + sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns @@ -371,12 +371,12 @@ def check_scoring(estimator, scoring=None, *, allow_none=False): estimator : estimator object implementing 'fit' The object to use to fit the data. - scoring : string, callable or None, optional, default=None + scoring : str or callable, default=None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. - allow_none : boolean, default=False + allow_none : bool, default=False If no scoring is specified and the estimator has no score function, we can either return None or raise an exception. @@ -431,7 +431,7 @@ def _check_multimetric_scoring(estimator, scoring=None): estimator : sklearn estimator instance The estimator for which the scoring will be applied. - scoring : string, callable, list/tuple, dict or None, default=None + scoring : str, callable, list, tuple or dict, default=None A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py index 8f967cee6264e..e267b44cee229 100644 --- a/sklearn/metrics/cluster/_bicluster.py +++ b/sklearn/metrics/cluster/_bicluster.py @@ -64,7 +64,7 @@ def consensus_score(a, b, *, similarity="jaccard"): b : (rows, columns) Another set of biclusters like ``a``. - similarity : string or function, default=jaccard + similarity : 'jaccard' or callable, default='jaccard' May be the string "jaccard" to use the Jaccard coefficient, or any function that takes four arguments, each of which is a 1d indicator vector: (a_rows, a_columns, b_rows, b_columns). diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index b561d0e5015a4..7f84001f98dec 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -90,12 +90,12 @@ def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False): labels_pred : array-like of shape (n_samples,) Cluster labels to evaluate - eps : None or float, default=None + eps : float, default=None If a float, that value is added to all values in the contingency matrix. This helps to stop NaN propagation. If ``None``, nothing is adjusted. - sparse : boolean, default=False + sparse : bool, default=False If True, return a sparse CSR continency matrix. If ``eps is not None``, and ``sparse is True``, will throw ValueError. @@ -601,8 +601,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None): labels_pred : int array-like of shape (n_samples,) A clustering of the data into disjoint subsets. - contingency : {None, array, sparse matrix}, \ - shape = [n_classes_true, n_classes_pred], \ + contingency : {ndarray, sparse matrix} of \ + shape (n_classes_true, n_classes_pred), \ default=None A contingency matrix given by the :func:`contingency_matrix` function. If value is ``None``, it will be computed, otherwise the given value is @@ -690,7 +690,7 @@ def adjusted_mutual_info_score(labels_true, labels_pred, *, labels_pred : int array-like of shape (n_samples,) A clustering of the data into disjoint subsets. - average_method : string, default='arithmetic' + average_method : str, default='arithmetic' How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. @@ -810,7 +810,7 @@ def normalized_mutual_info_score(labels_true, labels_pred, *, labels_pred : int array-like of shape (n_samples,) A clustering of the data into disjoint subsets. - average_method : string, default='arithmetic' + average_method : str, default='arithmetic' How to compute the normalizer in the denominator. Possible options are 'min', 'geometric', 'arithmetic', and 'max'. diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index 13ed169e69169..d0662967816b9 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -66,19 +66,19 @@ def silhouette_score(X, labels, *, metric='euclidean', sample_size=None, labels : array, shape = [n_samples] Predicted labels for each sample. - metric : string, or callable, default='euclidean' + metric : str or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`metrics.pairwise.pairwise_distances `. If X is the distance array itself, use ``metric="precomputed"``. - sample_size : int or None, default=None + sample_size : int, default=None The size of the sample to use when computing the Silhouette Coefficient on a random subset of the data. If ``sample_size is None``, no sampling is used. - random_state : int, RandomState instance or None, default=None + random_state : int or RandomState instance, default=None Determines random number generation for selecting a subset of samples. Used when ``sample_size is not None``. Pass an int for reproducible results across multiple function calls. @@ -182,7 +182,7 @@ def silhouette_samples(X, labels, *, metric='euclidean', **kwds): labels : array, shape = [n_samples] label values for each sample - metric : string, or callable, default='euclidean' + metric : str or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If X is diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index f6a9e74c1da51..8adf46f5564d6 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -86,20 +86,20 @@ def check_pairwise_arrays(X, Y, *, precomputed=False, dtype=None, True if X is to be treated as precomputed distances to the samples in Y. - dtype : string, type, list of types or None, default=None + dtype : str, type, list of types, default=None Data type required for X and Y. If None, the dtype will be an appropriate float type selected by _return_float_dtype. .. versionadded:: 0.18 - accept_sparse : string, boolean or list/tuple of strings, default='csr' + accept_sparse : str, bool or list/tuple of strings, default='csr' String[s] representing allowed sparse matrix formats, such as 'csc', 'csr', etc. If the input is sparse but not in the allowed format, it will be converted to the first listed format. True allows the input to be any format. False means that a sparse matrix input will raise an error. - force_all_finite : boolean or 'allow-nan', default=True + force_all_finite : bool or 'allow-nan', default=True Whether to raise an error on np.inf, np.nan, pd.NA in array. The possibilities are: @@ -226,12 +226,12 @@ def euclidean_distances(X, Y=None, *, Y_norm_squared=None, squared=False, Y : {array-like, sparse matrix}, shape (n_samples_2, n_features) - Y_norm_squared : array-like, shape (n_samples_2, ), default=None + Y_norm_squared : array-like of shape (n_samples_2, ), default=None Pre-computed dot-products of vectors in Y (e.g., ``(Y**2).sum(axis=1)``) May be ignored in some cases, see the note below. - squared : boolean, default=False + squared : bool, default=False Return squared Euclidean distances. X_norm_squared : array-like of shape (n_samples,), default=None @@ -534,10 +534,10 @@ def pairwise_distances_argmin_min(X, Y, *, axis=1, metric="euclidean", Y : {array-like, sparse matrix}, shape (n_samples2, n_features) Arrays containing points. - axis : int, optional, default=1 + axis : int, default=1 Axis along which the argmin and distances are to be computed. - metric : string or callable, default='euclidean' + metric : str or callable, default='euclidean' metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. @@ -623,10 +623,10 @@ def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean", Arrays containing points. Respective shapes (n_samples1, n_features) and (n_samples2, n_features) - axis : int, optional, default=1 + axis : int, default=1 Axis along which the argmin and distances are to be computed. - metric : string or callable, default="euclidean" + metric : str or callable, default="euclidean" metric to use for distance computation. Any metric from scikit-learn or scipy.spatial.distance can be used. @@ -688,7 +688,7 @@ def haversine_distances(X, Y=None): ---------- X : array-like, shape (n_samples_1, 2) - Y : array_like, shape (n_samples_2, 2), default=None + Y : array_like of shape (n_samples_2, 2), default=None Returns ------- @@ -814,8 +814,8 @@ def cosine_distances(X, Y=None): X : array-like, sparse matrix with shape (n_samples_X, n_features). - Y : array_like, sparse matrix with shape \ - (n_samples_Y, n_features), default=None. + Y : {array-like, sparse matrix} of shape \ + (n_samples_Y, n_features), default=None Returns @@ -990,7 +990,7 @@ def linear_kernel(X, Y=None, dense_output=True): Y : array of shape (n_samples_2, n_features), default=None - dense_output : boolean (optional), default=True + dense_output : bool, default=True Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. @@ -1158,12 +1158,12 @@ def cosine_similarity(X, Y=None, dense_output=True): X : ndarray or sparse array, shape: (n_samples_X, n_features) Input data. - Y : ndarray or sparse array, shape: (n_samples_Y, n_features), \ + Y : {ndarray, sparse matrix} of shape (n_samples_Y, n_features), \ default=None Input data. If ``None``, the output will be the pairwise similarities between all samples in ``X``. - dense_output : boolean (optional), default=True + dense_output : bool, default=True Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input arrays are sparse. @@ -1481,7 +1481,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None, [n_samples_a, n_features] otherwise Array of pairwise distances between samples, or a feature array. - Y : array [n_samples_b, n_features], default=None + Y : array of shape (n_samples_b, n_features), default=None An optional second feature array. Only allowed if metric != "precomputed". @@ -1497,7 +1497,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None, If None, pairwise_distances_chunked returns a generator of vertical chunks of the distance matrix. - metric : string, or callable, default='euclidean' + metric : str or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or @@ -1508,7 +1508,7 @@ def pairwise_distances_chunked(X, Y=None, *, reduce_func=None, should take two arrays from X as input and return a value indicating the distance between them. - n_jobs : int or None, default=None + n_jobs : int, default=None The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. @@ -1671,11 +1671,11 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None, [n_samples_a, n_features] otherwise Array of pairwise distances between samples, or a feature array. - Y : array [n_samples_b, n_features], default=None + Y : array of shape (n_samples_b, n_features), default=None An optional second feature array. Only allowed if metric != "precomputed". - metric : string, or callable, default='euclidean' + metric : str or callable, default='euclidean' The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or @@ -1686,7 +1686,7 @@ def pairwise_distances(X, Y=None, metric="euclidean", *, n_jobs=None, should take two arrays from X as input and return a value indicating the distance between them. - n_jobs : int or None, default=None + n_jobs : int, default=None The number of jobs to use for the computation. This works by breaking down the pairwise matrix into n_jobs even slices and computing them in parallel. From dc5cc394b1bf7214409523e8c2d80e74fb5acfc5 Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 21:06:28 -0400 Subject: [PATCH 37/42] Make edits --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index d6070b38dfec5..ea2d55229c4a8 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -370,7 +370,7 @@ def multilabel_confusion_matrix(y_true, y_pred, *, sample_weight=None, sample_weight : array-like of shape (n_samples,), default=None Sample weights - labels : array-like, default=None + labels : array-like of shape (n_classes,), default=None A list of classes or column indices to select some (or to force inclusion of classes absent from the data) @@ -641,7 +641,7 @@ def jaccard_score(y_true, y_pred, *, labels=None, pos_label=1, y_pred : 1d array-like, or label indicator array / sparse matrix Predicted labels, as returned by a classifier. - labels : array-like, default=None + labels : array-like of shape (n_classes,), default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a From 711658bd473a23cc0923f9619f866cc27222a854 Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 11 Jun 2020 21:13:29 -0400 Subject: [PATCH 38/42] Make edits --- sklearn/metrics/_regression.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py index 685297af80cb7..72d7bc465fc3b 100644 --- a/sklearn/metrics/_regression.py +++ b/sklearn/metrics/_regression.py @@ -533,9 +533,9 @@ def r2_score(y_true, y_pred, *, sample_weight=None, sample_weight : array-like of shape (n_samples,), default=None Sample weights. - multioutput : {'raw_values', 'uniform_average', 'variance_weigthed'}, None or \ - 'variance_weighted'] or None or array-like of \ - shape (n_outputs), default='uniform_average' + multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} \ + or None or array-like of shape (n_outputs), \ + default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. From 0da8b252e61916b55d9e602e1250190a14051039 Mon Sep 17 00:00:00 2001 From: genvalen Date: Fri, 11 Jun 2021 19:48:33 -0400 Subject: [PATCH 39/42] Fix another merge conflict --- sklearn/metrics/cluster/_supervised.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index b48e61ab4e70f..7814e7ba50e1c 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -101,13 +101,8 @@ def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False, If ``None``, nothing is adjusted. sparse : bool, default=False -<<<<<<< HEAD - If True, return a sparse CSR continency matrix. If ``eps is not None``, - and ``sparse is True``, will throw ValueError. -======= If `True`, return a sparse CSR continency matrix. If `eps` is not `None` and `sparse` is `True` will raise ValueError. ->>>>>>> 038c5cd04558e572b6a4dea7383a515ff10090e5 .. versionadded:: 0.18 @@ -745,14 +740,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None): labels_pred : int array-like of shape (n_samples,) A clustering of the data into disjoint subsets. -<<<<<<< HEAD - contingency : {ndarray, sparse matrix} of \ - shape (n_classes_true, n_classes_pred), \ - default=None -======= contingency : {ndarray, sparse matrix} of shape \ (n_classes_true, n_classes_pred), default=None ->>>>>>> 038c5cd04558e572b6a4dea7383a515ff10090e5 A contingency matrix given by the :func:`contingency_matrix` function. If value is ``None``, it will be computed, otherwise the given value is used, with ``labels_true`` and ``labels_pred`` ignored. From 0fa9b85af5e6a6a7bc0a9c83759982f65de425e9 Mon Sep 17 00:00:00 2001 From: genvalen Date: Thu, 1 Jul 2021 23:38:51 -0400 Subject: [PATCH 40/42] Accept incoming change from previous merge --- sklearn/metrics/cluster/_unsupervised.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py index 7890b97f06a8a..fd4933c1df17a 100644 --- a/sklearn/metrics/cluster/_unsupervised.py +++ b/sklearn/metrics/cluster/_unsupervised.py @@ -79,11 +79,7 @@ def silhouette_score( on a random subset of the data. If ``sample_size is None``, no sampling is used. -<<<<<<< HEAD - random_state : int or RandomState instance, default=None -======= random_state : int, RandomState instance or None, default=None ->>>>>>> 038c5cd04558e572b6a4dea7383a515ff10090e5 Determines random number generation for selecting a subset of samples. Used when ``sample_size is not None``. Pass an int for reproducible results across multiple function calls. From e0ab80702aeeec0629b19c4fef73de1ac929203b Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 5 Jul 2021 22:12:33 -0400 Subject: [PATCH 41/42] Remove Lars from DOCSTRING_IGNORE_LIST. --- maint_tools/test_docstrings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py index 9b23b1789aeb4..abd1f3303c518 100644 --- a/maint_tools/test_docstrings.py +++ b/maint_tools/test_docstrings.py @@ -75,7 +75,6 @@ "LabelEncoder", "LabelPropagation", "LabelSpreading", - "Lars", "LarsCV", "LassoCV", "LassoLars", From 4da84c407729da7ab4d53773ea4663e4ff083fa8 Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 5 Jul 2021 22:23:14 -0400 Subject: [PATCH 42/42] Ensure Lars passes numpydoc validation --- sklearn/linear_model/_least_angle.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index deec81a29c190..27b0fafd70ecd 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -842,7 +842,7 @@ def _lars_path_solver( class Lars(MultiOutputMixin, RegressorMixin, LinearModel): - """Least Angle Regression model a.k.a. LAR + """Least Angle Regression model a.k.a. LAR. Read more in the :ref:`User Guide `. @@ -940,6 +940,13 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel): .. versionadded:: 0.24 + See Also + -------- + lars_path: Compute Least Angle Regression or Lasso + path using LARS algorithm. + LarsCV : Cross-validated Least Angle Regression model. + sklearn.decomposition.sparse_encode : Sparse coding. + Examples -------- >>> from sklearn import linear_model @@ -948,12 +955,6 @@ class Lars(MultiOutputMixin, RegressorMixin, LinearModel): Lars(n_nonzero_coefs=1, normalize=False) >>> print(reg.coef_) [ 0. -1.11...] - - See Also - -------- - lars_path, LarsCV - sklearn.decomposition.sparse_encode - """ method = "lar" @@ -1094,7 +1095,7 @@ def fit(self, X, y, Xy=None): Returns ------- self : object - returns an instance of self. + Returns an instance of self. """ X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)