diff --git a/sklearn/linear_model/_perceptron.py b/sklearn/linear_model/_perceptron.py index 10e4f27f5490e..157083c010390 100644 --- a/sklearn/linear_model/_perceptron.py +++ b/sklearn/linear_model/_perceptron.py @@ -12,25 +12,25 @@ class Perceptron(BaseSGDClassifier): Parameters ---------- - penalty : None, 'l2' or 'l1' or 'elasticnet' - The penalty (aka regularization term) to be used. Defaults to None. + penalty : {'l2','l1','elasticnet'}, default=None + The penalty (aka regularization term) to be used. - alpha : float + alpha : float, default=0.0001 Constant that multiplies the regularization term if regularization is - used. Defaults to 0.0001 + used. - fit_intercept : bool + fit_intercept : bool, default=True Whether the intercept should be estimated or not. If False, the - data is assumed to be already centered. Defaults to True. + data is assumed to be already centered. - max_iter : int, optional (default=1000) + max_iter : int, default=1000 The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method. .. versionadded:: 0.19 - tol : float or None, optional (default=1e-3) + tol : float, default=1e-3 The stopping criterion. If it is not None, the iterations will stop when (loss > previous_loss - tol). @@ -39,20 +39,20 @@ class Perceptron(BaseSGDClassifier): shuffle : bool, default=True Whether or not the training data should be shuffled after each epoch. - verbose : integer, default=0 + verbose : int, default=0 The verbosity level - eta0 : double - Constant by which the updates are multiplied. Defaults to 1. + eta0 : double, default=1 + Constant by which the updates are multiplied. - n_jobs : int or None, optional (default=None) + n_jobs : int, default=None The number of CPUs to use to do the OVA (One Versus All, for multi-class problems) computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. - random_state : int, RandomState instance or None, optional, default None + random_state : int, RandomState instance, default=None The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number @@ -80,7 +80,7 @@ class Perceptron(BaseSGDClassifier): .. versionadded:: 0.20 - class_weight : dict, {class_label: weight} or "balanced" or None, optional + class_weight : dict, {class_label: weight} or "balanced", default=None Preset for the class_weight fit parameter. Weights associated with classes. If not given, all classes @@ -97,18 +97,18 @@ class Perceptron(BaseSGDClassifier): Attributes ---------- - coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\ - n_features] + coef_ : ndarray of shape = [1, n_features] if n_classes == 2 else \ + [n_classes, n_features] Weights assigned to the features. - intercept_ : array, shape = [1] if n_classes == 2 else [n_classes] + intercept_ : ndarray of shape = [1] if n_classes == 2 else [n_classes] Constants in decision function. n_iter_ : int The actual number of iterations to reach the stopping criterion. For multiclass fits, it is the maximum over every binary fit. - classes_ : array of shape (n_classes,) + classes_ : ndarray of shape (n_classes,) The unique classes labels. t_ : int diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 2a24fba4675a5..ec2f29dbb2317 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -245,11 +245,11 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', Parameters ---------- - X : {array-like, sparse matrix, LinearOperator} of shape \ + X : {ndarray, sparse matrix, LinearOperator} of shape \ (n_samples, n_features) Training data - y : array-like of shape (n_samples,) or (n_samples, n_targets) + y : ndarray of shape (n_samples,) or (n_samples, n_targets) Target values alpha : float or array-like of shape (n_targets,) @@ -268,7 +268,8 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', .. versionadded:: 0.17 - solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'} + solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \ + default='auto' Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. @@ -308,7 +309,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', .. versionadded:: 0.19 SAGA solver. - max_iter : int, optional + max_iter : int, default=None Maximum number of iterations for conjugate gradient solver. For the 'sparse_cg' and 'lsqr' solvers, the default value is determined by scipy.sparse.linalg. For 'sag' and saga solver, the default value is @@ -321,7 +322,7 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', Verbosity level. Setting verbose > 0 will display additional information depending on the solver used. - random_state : int, RandomState instance or None, optional, default None + random_state : int, RandomState instance, default=None The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number @@ -349,14 +350,14 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', Returns ------- - coef : array of shape (n_features,) or (n_targets, n_features) + coef : ndarray of shape (n_features,) or (n_targets, n_features) Weight vector(s). n_iter : int, optional The actual number of iteration performed by the solver. Only returned if `return_n_iter` is True. - intercept : float or array of shape (n_targets,) + intercept : float or ndarray of shape (n_targets,) The intercept of the model. Only returned if `return_intercept` is True and if X is a scipy sparse array. @@ -618,7 +619,7 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): Parameters ---------- - alpha : {float, array-like of shape (n_targets,)}, default=1.0 + alpha : {float, ndarray of shape (n_targets,)}, default=1.0 Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. @@ -643,7 +644,7 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. - max_iter : int, optional + max_iter : int, default=None Maximum number of iterations for conjugate gradient solver. For 'sparse_cg' and 'lsqr' solvers, the default value is determined by scipy.sparse.linalg. For 'sag' solver, the default value is 1000. @@ -651,7 +652,8 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): tol : float, default=1e-3 Precision of the solution. - solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'} + solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \ + default='auto' Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. @@ -688,7 +690,7 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): .. versionadded:: 0.19 SAGA solver. - random_state : int, RandomState instance or None, optional, default None + random_state : int, RandomState instance, default=None The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number @@ -700,14 +702,14 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): Attributes ---------- - coef_ : array of shape (n_features,) or (n_targets, n_features) + coef_ : ndarray of shape (n_features,) or (n_targets, n_features) Weight vector(s). - intercept_ : float or array of shape (n_targets,) + intercept_ : float or ndarray of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. - n_iter_ : None or array of shape (n_targets,) + n_iter_ : None or ndarray of shape (n_targets,) Actual number of iterations for each target. Available only for sag and lsqr solvers. Other solvers will return None. @@ -747,13 +749,13 @@ def fit(self, X, y, sample_weight=None): Parameters ---------- - X : {array-like, sparse matrix} of shape (n_samples, n_features) + X : {ndarray, sparse matrix} of shape (n_samples, n_features) Training data - y : array-like of shape (n_samples,) or (n_samples, n_targets) + y : ndarray of shape (n_samples,) or (n_samples, n_targets) Target values - sample_weight : float or array-like of shape (n_samples,), default=None + sample_weight : float or ndarray of shape (n_samples,), default=None Individual weights for each sample. If given a float, every sample will have the same weight. @@ -798,14 +800,14 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge): copy_X : bool, default=True If True, X will be copied; else, it may be overwritten. - max_iter : int, optional + max_iter : int, default=None Maximum number of iterations for conjugate gradient solver. The default value is determined by scipy.sparse.linalg. tol : float, default=1e-3 Precision of the solution. - class_weight : dict or 'balanced', optional + class_weight : dict or 'balanced', default=None Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. @@ -813,7 +815,8 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge): weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. - solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'} + solver : {'auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'}, \ + default='auto' Solver to use in the computational routines: - 'auto' chooses the solver automatically based on the type of data. @@ -847,7 +850,7 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge): .. versionadded:: 0.19 SAGA solver. - random_state : int, RandomState instance or None, default=None + random_state : int, RandomState instance, default=None The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number @@ -856,20 +859,20 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge): Attributes ---------- - coef_ : array of shape (1, n_features) or (n_classes, n_features) + coef_ : ndarray of shape (1, n_features) or (n_classes, n_features) Coefficient of the features in the decision function. ``coef_`` is of shape (1, n_features) when the given problem is binary. - intercept_ : float or array of shape (n_targets,) + intercept_ : float or ndarray of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. - n_iter_ : None or array of shape (n_targets,) + n_iter_ : None or ndarray of shape (n_targets,) Actual number of iterations for each target. Available only for sag and lsqr solvers. Other solvers will return None. - classes_ : array of shape (n_classes,) + classes_ : ndarray of shape (n_classes,) The classes labels. See Also @@ -907,13 +910,13 @@ def fit(self, X, y, sample_weight=None): Parameters ---------- - X : {array-like, sparse matrix} of shape (n_samples, n_features) + X : {ndarray, sparse matrix} of shape (n_samples, n_features) Training data. - y : array-like of shape (n_samples,) + y : ndarray of shape (n_samples,) Target values. - sample_weight : float or array-like of shape (n_samples,), default=None + sample_weight : float or ndarray of shape (n_samples,), default=None Individual weights for each sample. If given a float, every sample will have the same weight. @@ -1135,7 +1138,7 @@ def _compute_gram(self, X, sqrt_sw): Parameters ---------- - X : {array-like, sparse matrix} of shape (n_samples, n_features) + X : {ndarray, sparse matrix} of shape (n_samples, n_features) The preprocessed design matrix. sqrt_sw : ndarray of shape (n_samples,) @@ -1425,13 +1428,13 @@ def fit(self, X, y, sample_weight=None): Parameters ---------- - X : {array-like, sparse matrix} of shape (n_samples, n_features) + X : {ndarray, sparse matrix} of shape (n_samples, n_features) Training data. Will be cast to float64 if necessary. - y : array-like of shape (n_samples,) or (n_samples, n_targets) + y : ndarray of shape (n_samples,) or (n_samples, n_targets) Target values. Will be cast to float64 if necessary. - sample_weight : float or array-like of shape (n_samples,), default=None + sample_weight : float or ndarray of shape (n_samples,), default=None Individual weights for each sample. If given a float, every sample will have the same weight. @@ -1543,14 +1546,14 @@ def fit(self, X, y, sample_weight=None): Parameters ---------- - X : array-like of shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) Training data. If using GCV, will be cast to float64 if necessary. - y : array-like of shape (n_samples,) or (n_samples, n_targets) + y : ndarray of shape (n_samples,) or (n_samples, n_targets) Target values. Will be cast to X's dtype if necessary. - sample_weight : float or array-like of shape (n_samples,), default=None + sample_weight : float or ndarray of shape (n_samples,), default=None Individual weights for each sample. If given a float, every sample will have the same weight. @@ -1634,14 +1637,14 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - scoring : string, callable or None, default=None + scoring : string, callable, default=None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. If None, the negative mean squared error if cv is 'auto' or None (i.e. when using generalized cross-validation), and r2 score otherwise. - cv : int, cross-validation generator or an iterable, optional + cv : int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -1658,7 +1661,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. - gcv_mode : {None, 'auto', 'svd', eigen'}, optional + gcv_mode : {'auto', 'svd', eigen'}, default='auto' Flag indicating which strategy to use when performing Generalized Cross-Validation. Options are:: @@ -1670,7 +1673,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): The 'auto' mode is the default and is intended to pick the cheaper option of the two depending on the shape of the training data. - store_cv_values : boolean, default=False + store_cv_values : bool, default=False Flag indicating if the cross-validation values corresponding to each alpha should be stored in the ``cv_values_`` attribute (see below). This flag is only compatible with ``cv=None`` (i.e. using @@ -1678,17 +1681,17 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): Attributes ---------- - cv_values_ : array of shape (n_samples, n_alphas) or \ + cv_values_ : ndarray of shape (n_samples, n_alphas) or \ shape (n_samples, n_targets, n_alphas), optional Cross-validation values for each alpha (if ``store_cv_values=True``\ and ``cv=None``). After ``fit()`` has been called, this attribute \ will contain the mean squared errors (by default) or the values \ of the ``{loss,score}_func`` function (if provided in the constructor). - coef_ : array of shape (n_features) or (n_targets, n_features) + coef_ : ndarray of shape (n_features) or (n_targets, n_features) Weight vector(s). - intercept_ : float or array of shape (n_targets,) + intercept_ : float or ndarray of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. @@ -1750,12 +1753,12 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on an estimator with ``normalize=False``. - scoring : string, callable or None, default=None + scoring : string, callable, default=None A string (see model evaluation documentation) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. - cv : int, cross-validation generator or an iterable, optional + cv : int, cross-validation generator or an iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: @@ -1767,7 +1770,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. - class_weight : dict or 'balanced', optional + class_weight : dict or 'balanced', default=None Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. @@ -1775,7 +1778,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))`` - store_cv_values : boolean, default=False + store_cv_values : bool, default=False Flag indicating if the cross-validation values corresponding to each alpha should be stored in the ``cv_values_`` attribute (see below). This flag is only compatible with ``cv=None`` (i.e. using @@ -1783,19 +1786,19 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): Attributes ---------- - cv_values_ : array of shape (n_samples, n_targets, n_alphas), optional + cv_values_ : ndarray of shape (n_samples, n_targets, n_alphas), optional Cross-validation values for each alpha (if ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been called, this attribute will contain the mean squared errors (by default) or the values of the ``{loss,score}_func`` function (if provided in the constructor). This attribute exists only when ``store_cv_values`` is True. - coef_ : array of shape (1, n_features) or (n_targets, n_features) + coef_ : ndarray of shape (1, n_features) or (n_targets, n_features) Coefficient of the features in the decision function. ``coef_`` is of shape (1, n_features) when the given problem is binary. - intercept_ : float or array of shape (n_targets,) + intercept_ : float or ndarray of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. @@ -1805,7 +1808,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): best_score_ : float Score of base estimator with best alpha. - classes_ : array of shape (n_classes,) + classes_ : ndarray of shape (n_classes,) The classes labels. Examples @@ -1843,15 +1846,15 @@ def fit(self, X, y, sample_weight=None): Parameters ---------- - X : array-like of shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of features. When using GCV, will be cast to float64 if necessary. - y : array-like of shape (n_samples,) + y : ndarray of shape (n_samples,) Target values. Will be cast to X's dtype if necessary. - sample_weight : float or array-like of shape (n_samples,), default=None + sample_weight : float or ndarray of shape (n_samples,), default=None Individual weights for each sample. If given a float, every sample will have the same weight. diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index b6367d32e57a9..51af0e33139dd 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -140,13 +140,13 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas, Parameters ---------- - packed_coef_inter : array-like + packed_coef_inter : ndarray A vector comprising the flattened coefficients and intercepts. - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. - y : array-like, shape (n_samples,) + y : ndarray of shape (n_samples,) The target values. activations : list, length = n_layers - 1 @@ -185,10 +185,10 @@ def _backprop(self, X, y, activations, deltas, coef_grads, Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. - y : array-like, shape (n_samples,) + y : ndarray of shape (n_samples,) The target values. activations : list, length = n_layers - 1 @@ -613,10 +613,10 @@ def fit(self, X, y): Parameters ---------- - X : array-like or sparse matrix, shape (n_samples, n_features) + X : ndarray or sparse matrix of shape (n_samples, n_features) The input data. - y : array-like, shape (n_samples,) or (n_samples, n_outputs) + y : ndarray of shape (n_samples,) or (n_samples, n_outputs) The target values (class labels in classification, real numbers in regression). @@ -632,10 +632,10 @@ def partial_fit(self): Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. - y : array-like, shape (n_samples,) + y : ndarray of shape (n_samples,) The target values. Returns @@ -656,12 +656,12 @@ def _predict(self, X): Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. Returns ------- - y_pred : array-like, shape (n_samples,) or (n_samples, n_outputs) + y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs) The decision function of the samples for each class in the model. """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) @@ -698,11 +698,11 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron): Parameters ---------- - hidden_layer_sizes : tuple, length = n_layers - 2, default (100,) + hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,) The ith element represents the number of neurons in the ith hidden layer. - activation : {'identity', 'logistic', 'tanh', 'relu'}, default 'relu' + activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu' Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, @@ -717,7 +717,7 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron): - 'relu', the rectified linear unit function, returns f(x) = max(0, x) - solver : {'lbfgs', 'sgd', 'adam'}, default 'adam' + solver : {'lbfgs', 'sgd', 'adam'}, default='adam' The solver for weight optimization. - 'lbfgs' is an optimizer in the family of quasi-Newton methods. @@ -733,15 +733,15 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron): For small datasets, however, 'lbfgs' can converge faster and perform better. - alpha : float, optional, default 0.0001 + alpha : float, default=0.0001 L2 penalty (regularization term) parameter. - batch_size : int, optional, default 'auto' + batch_size : int, default='auto' Size of minibatches for stochastic optimizers. If the solver is 'lbfgs', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)` - learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant' + learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant' Learning rate schedule for weight updates. - 'constant' is a constant learning rate given by @@ -759,55 +759,55 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron): Only used when ``solver='sgd'``. - learning_rate_init : double, optional, default 0.001 + learning_rate_init : double, default=0.001 The initial learning rate used. It controls the step-size in updating the weights. Only used when solver='sgd' or 'adam'. - power_t : double, optional, default 0.5 + power_t : double, default=0.5 The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when solver='sgd'. - max_iter : int, optional, default 200 + max_iter : int, default=200 Maximum number of iterations. The solver iterates until convergence (determined by 'tol') or this number of iterations. For stochastic solvers ('sgd', 'adam'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps. - shuffle : bool, optional, default True + shuffle : bool, default=True Whether to shuffle samples in each iteration. Only used when solver='sgd' or 'adam'. - random_state : int, RandomState instance or None, optional, default None + random_state : int, RandomState instance or None, default=None If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. - tol : float, optional, default 1e-4 + tol : float, default=1e-4 Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to 'adaptive', convergence is considered to be reached and training stops. - verbose : bool, optional, default False + verbose : bool, default=False Whether to print progress messages to stdout. - warm_start : bool, optional, default False + warm_start : bool, default=False When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. - momentum : float, default 0.9 + momentum : float, default=0.9 Momentum for gradient descent update. Should be between 0 and 1. Only used when solver='sgd'. - nesterovs_momentum : boolean, default True + nesterovs_momentum : boolean, default=True Whether to use Nesterov's momentum. Only used when solver='sgd' and momentum > 0. - early_stopping : bool, default False + early_stopping : bool, default=False Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when @@ -816,29 +816,29 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron): except in a multilabel setting. Only effective when solver='sgd' or 'adam' - validation_fraction : float, optional, default 0.1 + validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True - beta_1 : float, optional, default 0.9 + beta_1 : float, default=0.9 Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver='adam' - beta_2 : float, optional, default 0.999 + beta_2 : float, default=0.999 Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver='adam' - epsilon : float, optional, default 1e-8 + epsilon : float, default=1e-8 Value for numerical stability in adam. Only used when solver='adam' - n_iter_no_change : int, optional, default 10 + n_iter_no_change : int, default=10 Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam' .. versionadded:: 0.20 - max_fun : int, optional, default 15000 + max_fun : int, default=15000 Only used when solver='lbfgs'. Maximum number of loss function calls. The solver iterates until convergence (determined by 'tol'), number of iterations reaches max_iter, or this number of loss function calls. @@ -849,7 +849,7 @@ class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron): Attributes ---------- - classes_ : array or list of array of shape (n_classes,) + classes_ : ndarray or list of ndarray of shape (n_classes,) Class labels for each output. loss_ : float @@ -959,12 +959,12 @@ def predict(self, X): Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. Returns ------- - y : array-like, shape (n_samples,) or (n_samples, n_classes) + y : ndarray, shape (n_samples,) or (n_samples, n_classes) The predicted classes. """ check_is_fitted(self) @@ -980,10 +980,10 @@ def fit(self, X, y): Parameters ---------- - X : array-like or sparse matrix, shape (n_samples, n_features) + X : ndarray or sparse matrix of shape (n_samples, n_features) The input data. - y : array-like, shape (n_samples,) or (n_samples, n_outputs) + y : ndarray, shape (n_samples,) or (n_samples, n_outputs) The target values (class labels in classification, real numbers in regression). @@ -1041,12 +1041,12 @@ def predict_log_proba(self, X): Parameters ---------- - X : array-like, shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) The input data. Returns ------- - log_y_prob : array-like, shape (n_samples, n_classes) + log_y_prob : ndarray of shape (n_samples, n_classes) The predicted log-probability of the sample for each class in the model, where classes are ordered as they are in `self.classes_`. Equivalent to log(predict_proba(X)) @@ -1059,12 +1059,12 @@ def predict_proba(self, X): Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. Returns ------- - y_prob : array-like, shape (n_samples, n_classes) + y_prob : ndarray of shape (n_samples, n_classes) The predicted probability of the sample for each class in the model, where classes are ordered as they are in `self.classes_`. """ @@ -1090,11 +1090,11 @@ class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron): Parameters ---------- - hidden_layer_sizes : tuple, length = n_layers - 2, default (100,) + hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,) The ith element represents the number of neurons in the ith hidden layer. - activation : {'identity', 'logistic', 'tanh', 'relu'}, default 'relu' + activation : {'identity', 'logistic', 'tanh', 'relu'}, default='relu' Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, @@ -1109,7 +1109,7 @@ class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron): - 'relu', the rectified linear unit function, returns f(x) = max(0, x) - solver : {'lbfgs', 'sgd', 'adam'}, default 'adam' + solver : {'lbfgs', 'sgd', 'adam'}, default='adam' The solver for weight optimization. - 'lbfgs' is an optimizer in the family of quasi-Newton methods. @@ -1125,15 +1125,15 @@ class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron): For small datasets, however, 'lbfgs' can converge faster and perform better. - alpha : float, optional, default 0.0001 + alpha : float, default=0.0001 L2 penalty (regularization term) parameter. - batch_size : int, optional, default 'auto' + batch_size : int, default='auto' Size of minibatches for stochastic optimizers. If the solver is 'lbfgs', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)` - learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant' + learning_rate : {'constant', 'invscaling', 'adaptive'}, default='constant' Learning rate schedule for weight updates. - 'constant' is a constant learning rate given by @@ -1151,55 +1151,55 @@ class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron): Only used when solver='sgd'. - learning_rate_init : double, optional, default 0.001 + learning_rate_init : double, default=0.001 The initial learning rate used. It controls the step-size in updating the weights. Only used when solver='sgd' or 'adam'. - power_t : double, optional, default 0.5 + power_t : double, default=0.5 The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to 'invscaling'. Only used when solver='sgd'. - max_iter : int, optional, default 200 + max_iter : int, default=200 Maximum number of iterations. The solver iterates until convergence (determined by 'tol') or this number of iterations. For stochastic solvers ('sgd', 'adam'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps. - shuffle : bool, optional, default True + shuffle : bool, default=True Whether to shuffle samples in each iteration. Only used when solver='sgd' or 'adam'. - random_state : int, RandomState instance or None, optional, default None + random_state : int, RandomState instance or None, default=None If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. - tol : float, optional, default 1e-4 + tol : float, default=1e-4 Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to 'adaptive', convergence is considered to be reached and training stops. - verbose : bool, optional, default False + verbose : bool, default=False Whether to print progress messages to stdout. - warm_start : bool, optional, default False + warm_start : bool, default=False When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary `. - momentum : float, default 0.9 + momentum : float, default=0.9 Momentum for gradient descent update. Should be between 0 and 1. Only used when solver='sgd'. - nesterovs_momentum : boolean, default True + nesterovs_momentum : boolean, default=True Whether to use Nesterov's momentum. Only used when solver='sgd' and momentum > 0. - early_stopping : bool, default False + early_stopping : bool, default=False Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when @@ -1207,29 +1207,29 @@ class MLPRegressor(RegressorMixin, BaseMultilayerPerceptron): ``n_iter_no_change`` consecutive epochs. Only effective when solver='sgd' or 'adam' - validation_fraction : float, optional, default 0.1 + validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True - beta_1 : float, optional, default 0.9 + beta_1 : float, default=0.9 Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver='adam' - beta_2 : float, optional, default 0.999 + beta_2 : float, default=0.999 Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver='adam' - epsilon : float, optional, default 1e-8 + epsilon : float, default=1e-8 Value for numerical stability in adam. Only used when solver='adam' - n_iter_no_change : int, optional, default 10 + n_iter_no_change : int, default=10 Maximum number of epochs to not meet ``tol`` improvement. Only effective when solver='sgd' or 'adam' .. versionadded:: 0.20 - max_fun : int, optional, default 15000 + max_fun : int, default=15000 Only used when solver='lbfgs'. Maximum number of function calls. The solver iterates until convergence (determined by 'tol'), number of iterations reaches max_iter, or this number of function calls. @@ -1321,12 +1321,12 @@ def predict(self, X): Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The input data. Returns ------- - y : array-like, shape (n_samples, n_outputs) + y : ndarray of shape (n_samples, n_outputs) The predicted values. """ check_is_fitted(self) diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py index efe3aeda951af..14960a8b2bb22 100644 --- a/sklearn/neural_network/_rbm.py +++ b/sklearn/neural_network/_rbm.py @@ -38,25 +38,25 @@ class BernoulliRBM(TransformerMixin, BaseEstimator): Parameters ---------- - n_components : int, optional + n_components : int, default=256 Number of binary hidden units. - learning_rate : float, optional + learning_rate : float, default=0.1 The learning rate for weight updates. It is *highly* recommended to tune this hyper-parameter. Reasonable values are in the 10**[0., -3.] range. - batch_size : int, optional + batch_size : int, default=10 Number of examples per minibatch. - n_iter : int, optional + n_iter : int, default=10 Number of iterations/sweeps over the training dataset to perform during training. - verbose : int, optional + verbose : int, default=0 The verbosity level. The default, zero, means silent mode. - random_state : integer or RandomState, optional + random_state : integer or RandomState, default=None A random number generator instance to define the state of the random permutations generator. If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. @@ -113,12 +113,12 @@ def transform(self, X): Parameters ---------- - X : {array-like, sparse matrix} shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The data to be transformed. Returns ------- - h : array, shape (n_samples, n_components) + h : ndarray of shape (n_samples, n_components) Latent representations of the data. """ check_is_fitted(self) @@ -131,12 +131,12 @@ def _mean_hiddens(self, v): Parameters ---------- - v : array-like, shape (n_samples, n_features) + v : ndarray of shape (n_samples, n_features) Values of the visible layer. Returns ------- - h : array-like, shape (n_samples, n_components) + h : ndarray of shape (n_samples, n_components) Corresponding mean field values for the hidden layer. """ p = safe_sparse_dot(v, self.components_.T) @@ -148,7 +148,7 @@ def _sample_hiddens(self, v, rng): Parameters ---------- - v : array-like, shape (n_samples, n_features) + v : ndarray of shape (n_samples, n_features) Values of the visible layer to sample from. rng : RandomState @@ -156,7 +156,7 @@ def _sample_hiddens(self, v, rng): Returns ------- - h : array-like, shape (n_samples, n_components) + h : ndarray of shape (n_samples, n_components) Values of the hidden layer. """ p = self._mean_hiddens(v) @@ -167,7 +167,7 @@ def _sample_visibles(self, h, rng): Parameters ---------- - h : array-like, shape (n_samples, n_components) + h : ndarray of shape (n_samples, n_components) Values of the hidden layer to sample from. rng : RandomState @@ -175,7 +175,7 @@ def _sample_visibles(self, h, rng): Returns ------- - v : array-like, shape (n_samples, n_features) + v : ndarray of shape (n_samples, n_features) Values of the visible layer. """ p = np.dot(h, self.components_) @@ -188,12 +188,12 @@ def _free_energy(self, v): Parameters ---------- - v : array-like, shape (n_samples, n_features) + v : ndarray of shape (n_samples, n_features) Values of the visible layer. Returns ------- - free_energy : array-like, shape (n_samples,) + free_energy : ndarray of shape (n_samples,) The value of the free energy. """ return (- safe_sparse_dot(v, self.intercept_visible_) @@ -205,12 +205,12 @@ def gibbs(self, v): Parameters ---------- - v : array-like, shape (n_samples, n_features) + v : ndarray of shape (n_samples, n_features) Values of the visible layer to start from. Returns ------- - v_new : array-like, shape (n_samples, n_features) + v_new : ndarray of shape (n_samples, n_features) Values of the visible layer after one Gibbs step. """ check_is_fitted(self) @@ -227,7 +227,7 @@ def partial_fit(self, X, y=None): Parameters ---------- - X : array-like, shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) Training data. Returns @@ -263,7 +263,7 @@ def _fit(self, v_pos, rng): Parameters ---------- - v_pos : array-like, shape (n_samples, n_features) + v_pos : ndarray of shape (n_samples, n_features) The data to use for training. rng : RandomState @@ -290,12 +290,12 @@ def score_samples(self, X): Parameters ---------- - X : {array-like, sparse matrix} shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) Values of the visible layer. Must be all-boolean (not checked). Returns ------- - pseudo_likelihood : array-like, shape (n_samples,) + pseudo_likelihood : ndarray of shape (n_samples,) Value of the pseudo-likelihood (proxy for likelihood). Notes @@ -328,7 +328,7 @@ def fit(self, X, y=None): Parameters ---------- - X : {array-like, sparse matrix} shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) Training data. Returns diff --git a/sklearn/neural_network/_stochastic_optimizers.py b/sklearn/neural_network/_stochastic_optimizers.py index 3e49e94de8bd1..02fc53a7aecc2 100644 --- a/sklearn/neural_network/_stochastic_optimizers.py +++ b/sklearn/neural_network/_stochastic_optimizers.py @@ -16,7 +16,7 @@ class BaseOptimizer: The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params - learning_rate_init : float, optional, default 0.1 + learning_rate_init : float, default=0.1 The initial learning rate used. It controls the step-size in updating the weights @@ -80,11 +80,11 @@ class SGDOptimizer(BaseOptimizer): The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params - learning_rate_init : float, optional, default 0.1 + learning_rate_init : float, default=0.1 The initial learning rate used. It controls the step-size in updating the weights - lr_schedule : {'constant', 'adaptive', 'invscaling'}, default 'constant' + lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant' Learning rate schedule for weight updates. -'constant', is a constant learning rate given by @@ -100,10 +100,10 @@ class SGDOptimizer(BaseOptimizer): tol, or fail to increase validation score by tol if 'early_stopping' is on, the current learning rate is divided by 5. - momentum : float, optional, default 0.9 + momentum : float, default=0.9 Value of momentum used, must be larger than or equal to 0 - nesterov : bool, optional, default True + nesterov : bool, default=True Whether to use nesterov's momentum or not. Use nesterov's if True Attributes @@ -192,19 +192,19 @@ class AdamOptimizer(BaseOptimizer): The concatenated list containing coefs_ and intercepts_ in MLP model. Used for initializing velocities and updating params - learning_rate_init : float, optional, default 0.1 + learning_rate_init : float, default=0.1 The initial learning rate used. It controls the step-size in updating the weights - beta_1 : float, optional, default 0.9 + beta_1 : float, default=0.9 Exponential decay rate for estimates of first moment vector, should be in [0, 1) - beta_2 : float, optional, default 0.999 + beta_2 : float, default=0.999 Exponential decay rate for estimates of second moment vector, should be in [0, 1) - epsilon : float, optional, default 1e-8 + epsilon : float, default=1e-8 Value for numerical stability Attributes