diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py index ef53aa6037c86..d9b2fa387be85 100644 --- a/sklearn/linear_model/_base.py +++ b/sklearn/linear_model/_base.py @@ -34,7 +34,7 @@ from ..utils.fixes import sparse_lsqr from ..utils._seq_dataset import ArrayDataset32, CSRDataset32 from ..utils._seq_dataset import ArrayDataset64, CSRDataset64 -from ..utils.validation import check_is_fitted +from ..utils.validation import check_is_fitted, _check_sample_weight from ..preprocessing import normalize as f_normalize # TODO: bayesian_ridge_regression and bayesian_regression_ard @@ -117,7 +117,6 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True, This is here because nearly all linear models will want their data to be centered. This function also systematically makes y consistent with X.dtype """ - if isinstance(sample_weight, numbers.Number): sample_weight = None if sample_weight is not None: @@ -183,7 +182,7 @@ def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True, def _rescale_data(X, y, sample_weight): """Rescale data so as to support sample_weight""" n_samples = X.shape[0] - sample_weight = np.array(sample_weight) + sample_weight = np.asarray(sample_weight) if sample_weight.ndim == 0: sample_weight = np.full(n_samples, sample_weight, dtype=sample_weight.dtype) @@ -404,7 +403,7 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): Attributes ---------- - coef_ : array, shape (n_features, ) or (n_targets, n_features) + coef_ : array of shape (n_features, ) or (n_targets, n_features) Estimated coefficients for the linear regression problem. If multiple targets are passed during the fit (y 2D), this is a 2D array of shape (n_targets, n_features), while if only @@ -413,10 +412,10 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): rank_ : int Rank of matrix `X`. Only available when `X` is dense. - singular_ : array, shape (min(X, y),) + singular_ : array of shape (min(X, y),) Singular values of `X`. Only available when `X` is dense. - intercept_ : float | array, shape = (n_targets,) + intercept_ : float or array of shape of (n_targets,) Independent term in the linear model. Set to 0.0 if `fit_intercept = False`. @@ -457,13 +456,13 @@ def fit(self, X, y, sample_weight=None): Parameters ---------- - X : array-like or sparse matrix, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) Training data - y : array_like, shape (n_samples, n_targets) + y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. Will be cast to X's dtype if necessary - sample_weight : numpy array of shape [n_samples] + sample_weight : array-like of shape (n_samples,), default=None Individual weights for each sample .. versionadded:: 0.17 @@ -478,8 +477,9 @@ def fit(self, X, y, sample_weight=None): X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) - if sample_weight is not None and np.asarray(sample_weight).ndim > 1: - raise ValueError("Sample weights must be 1D array or scalar") + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X, + dtype=X.dtype) X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py index c97f52ac6778e..333ae5494b4e9 100644 --- a/sklearn/linear_model/_bayes.py +++ b/sklearn/linear_model/_bayes.py @@ -14,6 +14,7 @@ from ..utils.extmath import fast_logdet from ..utils import check_X_y from ..utils.fixes import pinvh +from ..utils.validation import _check_sample_weight ############################################################################### @@ -169,7 +170,7 @@ def fit(self, X, y, sample_weight=None): Parameters ---------- - X : ndarray of shape (n_samples,n_features) + X : ndarray of shape (n_samples, n_features) Training data y : ndarray of shape (n_samples,) Target values. Will be cast to X's dtype if necessary @@ -190,6 +191,11 @@ def fit(self, X, y, sample_weight=None): ' Got {!r}.'.format(self.n_iter)) X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) + + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X, + dtype=X.dtype) + X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X, sample_weight=sample_weight) diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index 5fe50b5a21acb..1350878b54154 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -320,7 +320,8 @@ def fit(self, X, y, sample_weight=None): raise ValueError("%s does not support sample_weight. Samples" " weights are only used for the calibration" " itself." % estimator_name) - sample_weight = _check_sample_weight(sample_weight, X) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) n_inliers_best = 1 score_best = -np.inf diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index d217e0d832d2b..9e1dd7f22085d 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -245,15 +245,14 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', Parameters ---------- - X : {array-like, sparse matrix, LinearOperator}, - shape = [n_samples, n_features] + X : {array-like, sparse matrix, LinearOperator} of shape \ + (n_samples, n_features) Training data y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values - alpha : {float, array-like}, - shape = [n_targets] if array-like + alpha : float or array-like of shape (n_targets,) Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of the estimates. Larger values specify stronger regularization. @@ -262,8 +261,9 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', assumed to be specific to the targets. Hence they must correspond in number. - sample_weight : float or numpy array of shape (n_samples,), default=None - Individual weights for each sample. If sample_weight is not None and + sample_weight : float or array-like of shape (n_samples,), default=None + Individual weights for each sample. If given a float, every sample + will have the same weight. If sample_weight is not None and solver='auto', the solver will be set to 'cholesky'. .. versionadded:: 0.17 @@ -349,14 +349,14 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', Returns ------- - coef : array, shape = [n_features] or [n_targets, n_features] + coef : array of shape (n_features,) or (n_targets, n_features) Weight vector(s). n_iter : int, optional The actual number of iteration performed by the solver. Only returned if `return_n_iter` is True. - intercept : float or array, shape = [n_targets] + intercept : float or array of shape (n_targets,) The intercept of the model. Only returned if `return_intercept` is True and if X is a scipy sparse array. @@ -364,7 +364,6 @@ def ridge_regression(X, y, alpha, sample_weight=None, solver='auto', ----- This function won't compute the intercept. """ - return _ridge_regression(X, y, alpha, sample_weight=sample_weight, solver=solver, @@ -566,9 +565,9 @@ def fit(self, X, y, sample_weight=None): else: solver = self.solver - if ((sample_weight is not None) and - np.asarray(sample_weight).ndim > 1): - raise ValueError("Sample weights must be 1D array or scalar") + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X, + dtype=X.dtype) # when X is sparse we only remove offset from y X, y, X_offset, y_offset, X_scale = self._preprocess_data( @@ -613,7 +612,7 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): the linear least squares function and regularization is given by the l2-norm. Also known as Ridge Regression or Tikhonov regularization. This estimator has built-in support for multi-variate regression - (i.e., when y is a 2d-array of shape [n_samples, n_targets]). + (i.e., when y is a 2d-array of shape (n_samples, n_targets)). Read more in the :ref:`User Guide `. @@ -701,14 +700,14 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): Attributes ---------- - coef_ : array, shape (n_features,) or (n_targets, n_features) + coef_ : array of shape (n_features,) or (n_targets, n_features) Weight vector(s). - intercept_ : float | array, shape = (n_targets,) + intercept_ : float or array of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. - n_iter_ : array or None, shape (n_targets,) + n_iter_ : None or array of shape (n_targets,) Actual number of iterations for each target. Available only for sag and lsqr solvers. Other solvers will return None. @@ -732,8 +731,8 @@ class Ridge(MultiOutputMixin, RegressorMixin, _BaseRidge): >>> clf = Ridge(alpha=1.0) >>> clf.fit(X, y) Ridge() - """ + def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, copy_X=True, max_iter=None, tol=1e-3, solver="auto", random_state=None): @@ -744,7 +743,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, random_state=random_state) def fit(self, X, y, sample_weight=None): - """Fit Ridge regression model + """Fit Ridge regression model. Parameters ---------- @@ -754,8 +753,9 @@ def fit(self, X, y, sample_weight=None): y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values - sample_weight : float or numpy array of shape [n_samples] - Individual weights for each sample + sample_weight : float or array-like of shape (n_samples,), default=None + Individual weights for each sample. If given a float, every sample + will have the same weight. Returns ------- @@ -856,16 +856,16 @@ class RidgeClassifier(LinearClassifierMixin, _BaseRidge): Attributes ---------- - coef_ : array, shape (1, n_features) or (n_classes, n_features) + coef_ : array of shape (1, n_features) or (n_classes, n_features) Coefficient of the features in the decision function. ``coef_`` is of shape (1, n_features) when the given problem is binary. - intercept_ : float | array, shape = (n_targets,) + intercept_ : float or array of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. - n_iter_ : array or None, shape (n_targets,) + n_iter_ : None or array of shape (n_targets,) Actual number of iterations for each target. Available only for sag and lsqr solvers. Other solvers will return None. @@ -903,7 +903,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, self.class_weight = class_weight def fit(self, X, y, sample_weight=None): - """Fit Ridge regression model. + """Fit Ridge classifier model. Parameters ---------- @@ -913,8 +913,9 @@ def fit(self, X, y, sample_weight=None): y : array-like of shape (n_samples,) Target values. - sample_weight : {float, array-like of shape (n_samples,)}, default=None - Sample weight. + sample_weight : float or array-like of shape (n_samples,), default=None + Individual weights for each sample. If given a float, every sample + will have the same weight. .. versionadded:: 0.17 *sample_weight* support to Classifier. @@ -926,7 +927,9 @@ def fit(self, X, y, sample_weight=None): """ _accept_sparse = _get_valid_accept_sparse(sparse.issparse(X), self.solver) - check_X_y(X, y, accept_sparse=_accept_sparse, multi_output=True) + X, y = check_X_y(X, y, accept_sparse=_accept_sparse, multi_output=True, + y_numeric=False) + sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1) Y = self._label_binarizer.fit_transform(y) @@ -939,8 +942,6 @@ def fit(self, X, y, sample_weight=None): self.__class__.__name__)) if self.class_weight: - if sample_weight is None: - sample_weight = 1. # modify the sample weights with the corresponding class weight sample_weight = (sample_weight * compute_sample_weight(self.class_weight, y)) @@ -976,10 +977,10 @@ def _find_smallest_angle(query, vectors): Parameters ---------- - query : ndarray, shape (n_samples,) + query : ndarray of shape (n_samples,) Normalized query vector. - vectors : ndarray, shape (n_samples, n_features) + vectors : ndarray of shape (n_samples, n_features) Vectors to which we compare query, as columns. Must be normalized. """ abs_cosine = np.abs(query.dot(vectors)) @@ -1120,17 +1121,17 @@ def _compute_gram(self, X, sqrt_sw): Parameters ---------- - X : {array-like, sparse matrix}, shape (n_samples, n_features) + X : {array-like, sparse matrix} of shape (n_samples, n_features) The preprocessed design matrix. - sqrt_sw : ndarray, shape (n_samples,) + sqrt_sw : ndarray of shape (n_samples,) square roots of sample weights Returns ------- - gram : ndarray, shape (n_samples, n_samples) + gram : ndarray of shape (n_samples, n_samples) The Gram matrix. - X_mean : ndarray, shape (n_feature,) + X_mean : ndarray of shape (n_feature,) The weighted mean of ``X`` for each feature. Notes @@ -1170,17 +1171,17 @@ def _compute_covariance(self, X, sqrt_sw): Parameters ---------- - X : sparse matrix, shape (n_samples, n_features) + X : sparse matrix of shape (n_samples, n_features) The preprocessed design matrix. - sqrt_sw : ndarray, shape (n_samples,) + sqrt_sw : ndarray of shape (n_samples,) square roots of sample weights Returns ------- - covariance : ndarray, shape (n_features, n_features) + covariance : ndarray of shape (n_features, n_features) The covariance matrix. - X_mean : ndarray, shape (n_feature,) + X_mean : ndarray of shape (n_feature,) The weighted mean of ``X`` for each feature. Notes @@ -1219,16 +1220,16 @@ def _sparse_multidot_diag(self, X, A, X_mean, sqrt_sw): ---------- X : sparse matrix of shape (n_samples, n_features) - A : np.ndarray, shape = (n_features, n_features) + A : ndarray of shape (n_features, n_features) - X_mean : np.ndarray, shape = (n_features,) + X_mean : ndarray of shape (n_features,) - sqrt_sw : np.ndarray, shape = (n_features,) + sqrt_sw : ndarray of shape (n_features,) square roots of sample weights Returns ------- - diag : np.ndarray, shape = (n_samples,) + diag : np.ndarray, shape (n_samples,) The computed diagonal. """ intercept_col = scale = sqrt_sw @@ -1249,7 +1250,7 @@ def _sparse_multidot_diag(self, X, A, X_mean, sqrt_sw): return diag def _eigen_decompose_gram(self, X, y, sqrt_sw): - """Eigendecomposition of X.X^T, used when n_samples <= n_features""" + """Eigendecomposition of X.X^T, used when n_samples <= n_features.""" # if X is dense it has already been centered in preprocessing K, X_mean = self._compute_gram(X, sqrt_sw) if self.fit_intercept: @@ -1263,7 +1264,7 @@ def _eigen_decompose_gram(self, X, y, sqrt_sw): return X_mean, eigvals, Q, QT_y def _solve_eigen_gram(self, alpha, y, sqrt_sw, X_mean, eigvals, Q, QT_y): - """Compute dual coefficients and diagonal of G^-1 + """Compute dual coefficients and diagonal of G^-1. Used when we have a decomposition of X.X^T (n_samples <= n_features). """ @@ -1329,7 +1330,7 @@ def _solve_eigen_covariance_no_intercept( def _solve_eigen_covariance_intercept( self, alpha, y, sqrt_sw, X_mean, eigvals, V, X): - """Compute dual coefficients and diagonal of G^-1 + """Compute dual coefficients and diagonal of G^-1. Used when we have a decomposition of X^T.X (n_samples > n_features and X is sparse), @@ -1359,7 +1360,7 @@ def _solve_eigen_covariance_intercept( def _solve_eigen_covariance( self, alpha, y, sqrt_sw, X_mean, eigvals, V, X): - """Compute dual coefficients and diagonal of G^-1 + """Compute dual coefficients and diagonal of G^-1. Used when we have a decomposition of X^T.X (n_samples > n_features and X is sparse). @@ -1386,7 +1387,7 @@ def _svd_decompose_design_matrix(self, X, y, sqrt_sw): def _solve_svd_design_matrix( self, alpha, y, sqrt_sw, X_mean, singvals_sq, U, UT_y): - """Compute dual coefficients and diagonal of G^-1 + """Compute dual coefficients and diagonal of G^-1. Used when we have an SVD decomposition of X (n_samples > n_features and X is dense). @@ -1406,34 +1407,36 @@ def _solve_svd_design_matrix( return G_inverse_diag, c def fit(self, X, y, sample_weight=None): - """Fit Ridge regression model + """Fit Ridge regression model with gcv. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) - Training data. Will be cast to float64 if necessary + Training data. Will be cast to float64 if necessary. y : array-like of shape (n_samples,) or (n_samples, n_targets) - Target values. Will be cast to float64 if necessary + Target values. Will be cast to float64 if necessary. - sample_weight : float or array-like of shape [n_samples] - Sample weight + sample_weight : float or array-like of shape (n_samples,), default=None + Individual weights for each sample. If given a float, every sample + will have the same weight. Returns ------- self : object """ - X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], - dtype=[np.float64], + X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=[np.float64], multi_output=True, y_numeric=True) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X, + dtype=X.dtype) + if np.any(self.alphas <= 0): raise ValueError( "alphas must be positive. Got {} containing some " "negative or null value instead.".format(self.alphas)) - sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) - n_samples, n_features = X.shape X, y, X_offset, y_offset, X_scale = LinearModel._preprocess_data( @@ -1525,7 +1528,7 @@ def __init__(self, alphas=(0.1, 1.0, 10.0), self.store_cv_values = store_cv_values def fit(self, X, y, sample_weight=None): - """Fit Ridge regression model + """Fit Ridge regression model with cv. Parameters ---------- @@ -1534,10 +1537,11 @@ def fit(self, X, y, sample_weight=None): if necessary. y : array-like of shape (n_samples,) or (n_samples, n_targets) - Target values. Will be cast to X's dtype if necessary + Target values. Will be cast to X's dtype if necessary. - sample_weight : float or array-like of shape [n_samples] - Sample weight + sample_weight : float or array-like of shape (n_samples,), default=None + Individual weights for each sample. If given a float, every sample + will have the same weight. Returns ------- @@ -1595,7 +1599,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): Parameters ---------- - alphas : numpy array of shape (n_alphas,), default=(0.1, 1.0, 10.0) + alphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0) Array of alpha values to try. Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of @@ -1661,17 +1665,17 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): Attributes ---------- - cv_values_ : array, shape = [n_samples, n_alphas] or \ - shape = [n_samples, n_targets, n_alphas], optional + cv_values_ : array of shape (n_samples, n_alphas) or \ + shape (n_samples, n_targets, n_alphas), optional Cross-validation values for each alpha (if ``store_cv_values=True``\ and ``cv=None``). After ``fit()`` has been called, this attribute \ will contain the mean squared errors (by default) or the values \ of the ``{loss,score}_func`` function (if provided in the constructor). - coef_ : array, shape = [n_features] or [n_targets, n_features] + coef_ : array of shape (n_features) or (n_targets, n_features) Weight vector(s). - intercept_ : float | array, shape = (n_targets,) + intercept_ : float or array of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. @@ -1709,7 +1713,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): Parameters ---------- - alphas : numpy array of shape (n_alphas,), default=(0.1, 1.0, 10.0) + alphas : ndarray of shape (n_alphas,), default=(0.1, 1.0, 10.0) Array of alpha values to try. Regularization strength; must be a positive float. Regularization improves the conditioning of the problem and reduces the variance of @@ -1763,19 +1767,19 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): Attributes ---------- - cv_values_ : array, shape = [n_samples, n_targets, n_alphas], optional + cv_values_ : array of shape (n_samples, n_targets, n_alphas), optional Cross-validation values for each alpha (if ``store_cv_values=True`` and ``cv=None``). After ``fit()`` has been called, this attribute will contain the mean squared errors (by default) or the values of the ``{loss,score}_func`` function (if provided in the constructor). This attribute exists only when ``store_cv_values`` is True. - coef_ : array, shape (1, n_features) or (n_targets, n_features) + coef_ : array of shape (1, n_features) or (n_targets, n_features) Coefficient of the features in the decision function. ``coef_`` is of shape (1, n_features) when the given problem is binary. - intercept_ : float | array, shape = (n_targets,) + intercept_ : float or array of shape (n_targets,) Independent term in decision function. Set to 0.0 if ``fit_intercept = False``. @@ -1816,27 +1820,29 @@ def __init__(self, alphas=(0.1, 1.0, 10.0), fit_intercept=True, self.class_weight = class_weight def fit(self, X, y, sample_weight=None): - """Fit the ridge classifier. + """Fit Ridge classifier with cv. Parameters ---------- - X : array-like, shape (n_samples, n_features) + X : array-like of shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of features. When using GCV, will be cast to float64 if necessary. - y : array-like, shape (n_samples,) - Target values. Will be cast to X's dtype if necessary + y : array-like of shape (n_samples,) + Target values. Will be cast to X's dtype if necessary. - sample_weight : {float, array-like of shape (n_samples,)}, default=None - Sample weight. + sample_weight : float or array-like of shape (n_samples,), default=None + Individual weights for each sample. If given a float, every sample + will have the same weight. Returns ------- self : object """ - check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], - multi_output=True) + X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], + multi_output=True, y_numeric=False) + sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1) Y = self._label_binarizer.fit_transform(y) @@ -1844,8 +1850,6 @@ def fit(self, X, y, sample_weight=None): y = column_or_1d(y, warn=True) if self.class_weight: - if sample_weight is None: - sample_weight = 1. # modify the sample weights with the corresponding class weight sample_weight = (sample_weight * compute_sample_weight(self.class_weight, y))