diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 6693c9896c87a..3d71c1e5abbf5 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -809,6 +809,14 @@ def __init__(self, pattern=None, *, dtype_include=None, self.dtype_exclude = dtype_exclude def __call__(self, df): + """Callable for column selection to be used by a + :class:`ColumnTransformer`. + + Parameters + ---------- + df : dataframe of shape (n_features, n_samples) + DataFrame to select columns from. + """ if not hasattr(df, 'iloc'): raise ValueError("make_column_selector can only be applied to " "pandas dataframes") diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py index 6ee5d7adaeb5b..d4331b591e43f 100644 --- a/sklearn/covariance/_robust_covariance.py +++ b/sklearn/covariance/_robust_covariance.py @@ -632,7 +632,7 @@ def fit(self, X, y=None): Training data, where `n_samples` is the number of samples and `n_features` is the number of features. - y: Ignored + y : Ignored Not used, present for API consistency by convention. Returns diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py index cada82996ca22..72b13681200ff 100644 --- a/sklearn/covariance/_shrunk_covariance.py +++ b/sklearn/covariance/_shrunk_covariance.py @@ -135,7 +135,7 @@ def fit(self, X, y=None): Training data, where n_samples is the number of samples and n_features is the number of features. - y: Ignored + y : Ignored Not used, present for API consistency by convention. Returns diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index b4219a5a1d520..42d727b9ae2be 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -537,7 +537,8 @@ class PLSRegression(_PLS): `Y = X @ coef_`. n_iter_ : list of shape (n_components,) - Number of iterations of the power method for each component. + Number of iterations of the power method, for each + component. n_features_in_ : int Number of features seen during :term:`fit`. diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 27261739de621..e2ae9f8355a54 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -1123,6 +1123,8 @@ def transform(self, X, y=None): Test data to be transformed, must have the same number of features as the data used to train the model. + y : Ignored + Returns ------- X_new : ndarray of shape (n_samples, n_components) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index d30cd030bf698..15b4c95f8cd54 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -940,14 +940,6 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255. - monotonic_cst : array-like of int of shape (n_features), default=None - Indicates the monotonic constraint to enforce on each feature. -1, 1 - and 0 respectively correspond to a negative constraint, positive - constraint and no constraint. Read more in the :ref:`User Guide - `. - - .. versionadded:: 0.23 - categorical_features : array-like of {bool, int} of shape (n_features) \ or shape (n_categorical_features,), default=None. Indicates the categorical features. @@ -964,6 +956,14 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): .. versionadded:: 0.24 + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonic constraint to enforce on each feature. -1, 1 + and 0 respectively correspond to a negative constraint, positive + constraint and no constraint. Read more in the :ref:`User Guide + `. + + .. versionadded:: 0.23 + warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the @@ -1193,14 +1193,6 @@ class HistGradientBoostingClassifier(ClassifierMixin, Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255. - monotonic_cst : array-like of int of shape (n_features), default=None - Indicates the monotonic constraint to enforce on each feature. -1, 1 - and 0 respectively correspond to a negative constraint, positive - constraint and no constraint. Read more in the :ref:`User Guide - `. - - .. versionadded:: 0.23 - categorical_features : array-like of {bool, int} of shape (n_features) \ or shape (n_categorical_features,), default=None. Indicates the categorical features. @@ -1217,6 +1209,14 @@ class HistGradientBoostingClassifier(ClassifierMixin, .. versionadded:: 0.24 + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonic constraint to enforce on each feature. -1, 1 + and 0 respectively correspond to a negative constraint, positive + constraint and no constraint. Read more in the :ref:`User Guide + `. + + .. versionadded:: 0.23 + warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py index 271bc0062ef6b..7ee6b043a0df1 100644 --- a/sklearn/feature_selection/_sequential.py +++ b/sklearn/feature_selection/_sequential.py @@ -36,7 +36,7 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, to select. If float between 0 and 1, it is the fraction of features to select. - direction: {'forward', 'backward'}, default='forward' + direction : {'forward', 'backward'}, default='forward' Whether to perform forward selection or backward selection. scoring : str, callable, list/tuple or dict, default=None diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py index 68797b176727b..7d98f7734b322 100644 --- a/sklearn/linear_model/_glm/glm.py +++ b/sklearn/linear_model/_glm/glm.py @@ -593,6 +593,10 @@ class TweedieRegressor(GeneralizedLinearRegressor): GLMs. In this case, the design matrix `X` must have full column rank (no collinearities). + fit_intercept : bool, default=True + Specifies if a constant (a.k.a. bias or intercept) should be + added to the linear predictor (X @ coef + intercept). + link : {'auto', 'identity', 'log'}, default='auto' The link function of the GLM, i.e. mapping from linear predictor `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets @@ -601,10 +605,6 @@ class TweedieRegressor(GeneralizedLinearRegressor): - 'identity' for Normal distribution - 'log' for Poisson, Gamma and Inverse Gaussian distributions - fit_intercept : bool, default=True - Specifies if a constant (a.k.a. bias or intercept) should be - added to the linear predictor (X @ coef + intercept). - max_iter : int, default=100 The maximal number of iterations for the solver. diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py index d9f642e38052a..e512b2d972ce6 100644 --- a/sklearn/metrics/_plot/det_curve.py +++ b/sklearn/metrics/_plot/det_curve.py @@ -22,8 +22,8 @@ class DetCurveDisplay: fpr : ndarray False positive rate. - tpr : ndarray - True positive rate. + fnr : ndarray + False negative rate. estimator_name : str, default=None Name of estimator. If None, the estimator name is not shown. diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index e0baf77e1f22d..e9c498816eae2 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -1024,42 +1024,6 @@ class GridSearchCV(BaseSearchCV): .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None - pre_dispatch : int, or str, default=n_jobs - Controls the number of jobs that get dispatched during parallel - execution. Reducing this number can be useful to avoid an - explosion of memory consumption when more jobs get dispatched - than CPUs can process. This parameter can be: - - - None, in which case all the jobs are immediately - created and spawned. Use this for lightweight and - fast-running jobs, to avoid delays due to on-demand - spawning of the jobs - - - An int, giving the exact number of total jobs that are - spawned - - - A str, giving an expression as a function of n_jobs, - as in '2*n_jobs' - - cv : int, cross-validation generator or an iterable, default=None - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - - None, to use the default 5-fold cross validation, - - integer, to specify the number of folds in a `(Stratified)KFold`, - - :term:`CV splitter`, - - An iterable yielding (train, test) splits as arrays of indices. - - For integer/None inputs, if the estimator is a classifier and ``y`` is - either binary or multiclass, :class:`StratifiedKFold` is used. In all - other cases, :class:`KFold` is used. - - Refer :ref:`User Guide ` for the various - cross-validation strategies that can be used here. - - .. versionchanged:: 0.22 - ``cv`` default value if None changed from 3-fold to 5-fold. - refit : bool, str, or callable, default=True Refit an estimator using the best found parameters on the whole dataset. @@ -1090,6 +1054,25 @@ class GridSearchCV(BaseSearchCV): .. versionchanged:: 0.20 Support for callable added. + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - integer, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + ``cv`` default value if None changed from 3-fold to 5-fold. + verbose : int Controls the verbosity: the higher, the more messages. @@ -1099,6 +1082,23 @@ class GridSearchCV(BaseSearchCV): - >3 : the fold and candidate parameter indexes are also displayed together with the starting time of the computation. + pre_dispatch : int, or str, default=n_jobs + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediately + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A str, giving an expression as a function of n_jobs, + as in '2*n_jobs' + error_score : 'raise' or numeric, default=np.nan Value to assign to the score if an error occurs in estimator fitting. If set to 'raise', the error is raised. If a numeric value is given, @@ -1366,42 +1366,6 @@ class RandomizedSearchCV(BaseSearchCV): .. versionchanged:: v0.20 `n_jobs` default changed from 1 to None - pre_dispatch : int, or str, default=None - Controls the number of jobs that get dispatched during parallel - execution. Reducing this number can be useful to avoid an - explosion of memory consumption when more jobs get dispatched - than CPUs can process. This parameter can be: - - - None, in which case all the jobs are immediately - created and spawned. Use this for lightweight and - fast-running jobs, to avoid delays due to on-demand - spawning of the jobs - - - An int, giving the exact number of total jobs that are - spawned - - - A str, giving an expression as a function of n_jobs, - as in '2*n_jobs' - - cv : int, cross-validation generator or an iterable, default=None - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - - None, to use the default 5-fold cross validation, - - integer, to specify the number of folds in a `(Stratified)KFold`, - - :term:`CV splitter`, - - An iterable yielding (train, test) splits as arrays of indices. - - For integer/None inputs, if the estimator is a classifier and ``y`` is - either binary or multiclass, :class:`StratifiedKFold` is used. In all - other cases, :class:`KFold` is used. - - Refer :ref:`User Guide ` for the various - cross-validation strategies that can be used here. - - .. versionchanged:: 0.22 - ``cv`` default value if None changed from 3-fold to 5-fold. - refit : bool, str, or callable, default=True Refit an estimator using the best found parameters on the whole dataset. @@ -1432,9 +1396,45 @@ class RandomizedSearchCV(BaseSearchCV): .. versionchanged:: 0.20 Support for callable added. + cv : int, cross-validation generator or an iterable, default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross validation, + - integer, to specify the number of folds in a `(Stratified)KFold`, + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if the estimator is a classifier and ``y`` is + either binary or multiclass, :class:`StratifiedKFold` is used. In all + other cases, :class:`KFold` is used. + + Refer :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + .. versionchanged:: 0.22 + ``cv`` default value if None changed from 3-fold to 5-fold. + verbose : int Controls the verbosity: the higher, the more messages. + pre_dispatch : int, or str, default=None + Controls the number of jobs that get dispatched during parallel + execution. Reducing this number can be useful to avoid an + explosion of memory consumption when more jobs get dispatched + than CPUs can process. This parameter can be: + + - None, in which case all the jobs are immediately + created and spawned. Use this for lightweight and + fast-running jobs, to avoid delays due to on-demand + spawning of the jobs + + - An int, giving the exact number of total jobs that are + spawned + + - A str, giving an expression as a function of n_jobs, + as in '2*n_jobs' + random_state : int, RandomState instance or None, default=None Pseudo random number generator state used for random uniform sampling from lists of possible values instead of scipy.stats distributions. diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index c84716130ed05..92a4135147b87 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -244,7 +244,7 @@ class MinMaxScaler(TransformerMixin, BaseEstimator): Set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array). - clip: bool, default=False + clip : bool, default=False Set to True to clip transformed values of held-out data to provided `feature range`. diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py index 8c79065c830d1..54fa9ba45e1b8 100644 --- a/sklearn/semi_supervised/_self_training.py +++ b/sklearn/semi_supervised/_self_training.py @@ -41,6 +41,11 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator): Invoking the ``fit`` method will fit a clone of the passed estimator, which will be stored in the ``base_estimator_`` attribute. + threshold : float, default=0.75 + The decision threshold for use with `criterion='threshold'`. + Should be in [0, 1). When using the 'threshold' criterion, a + :ref:`well calibrated classifier ` should be used. + criterion : {'threshold', 'k_best'}, default='threshold' The selection criterion used to select which labels to add to the training set. If 'threshold', pseudo-labels with prediction @@ -49,11 +54,6 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator): added to the dataset. When using the 'threshold' criterion, a :ref:`well calibrated classifier ` should be used. - threshold : float, default=0.75 - The decision threshold for use with `criterion='threshold'`. - Should be in [0, 1). When using the 'threshold' criterion, a - :ref:`well calibrated classifier ` should be used. - k_best : int, default=10 The amount of samples to add in each iteration. Only used when `criterion` is k_best'. @@ -64,7 +64,7 @@ class SelfTrainingClassifier(MetaEstimatorMixin, BaseEstimator): until no new pseudo-labels are added, or all unlabeled samples have been labeled. - verbose: bool, default=False + verbose : bool, default=False Enable verbose output. Attributes diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 2328b8d84c84e..cd2bdba449799 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -83,8 +83,9 @@ def test_docstring_parameters(): with warnings.catch_warnings(record=True): module = importlib.import_module(name) classes = inspect.getmembers(module, inspect.isclass) - # Exclude imported classes - classes = [cls for cls in classes if cls[1].__module__ == name] + # Exclude non-scikit-learn classes + classes = [cls for cls in classes + if cls[1].__module__.startswith('sklearn')] for cname, cls in classes: this_incorrect = [] if cname in _DOCSTRING_IGNORES or cname.startswith('_'):