diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 3576ae3bdeb60..6b8dd8024ce1a 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -797,11 +797,21 @@ def __init__(self, alphas=(0.1, 1.0, 10.0), self.gcv_mode = gcv_mode self.store_cv_values = store_cv_values - def _pre_compute(self, X, y): + def _pre_compute(self, X, y, sample_weight=None): # even if X is very sparse, K is usually very dense K = safe_sparse_dot(X, X.T, dense_output=True) + if sample_weight is not None: + has_sw = True + sqrt_sw = np.sqrt(sample_weight).reshape(K.shape[0], 1) + K *= sqrt_sw + K *= sqrt_sw.T + else: + has_sw = False v, Q = linalg.eigh(K) - QT_y = np.dot(Q.T, y) + if has_sw: + QT_y = np.dot(Q.T, sqrt_sw * y.reshape(y.shape[0], -1)) + else: + QT_y = np.dot(Q.T, y) return v, Q, QT_y def _decomp_diag(self, v_prime, Q): @@ -815,15 +825,28 @@ def _diag_dot(self, D, B): D = D[(slice(None), ) + (np.newaxis, ) * (len(B.shape) - 1)] return D * B - def _errors(self, alpha, y, v, Q, QT_y): + def _errors(self, alpha, y, v, Q, QT_y, sample_weight=None): # don't construct matrix G, instead compute action on y & diagonal w = 1.0 / (v + alpha) c = np.dot(Q, self._diag_dot(w, QT_y)) + c.shape = (c.shape[0], -1) + if sample_weight is not None: + has_sw = True + sqrt_sw = np.sqrt(sample_weight).reshape(y.shape[0], 1) + c *= sqrt_sw + else: + has_sw = False G_diag = self._decomp_diag(w, Q) - # handle case where y is 2-d - if len(y.shape) != 1: - G_diag = G_diag[:, np.newaxis] - return (c / G_diag) ** 2, c + G_diag = G_diag[:, np.newaxis] + + errors = c / G_diag + if has_sw: + errors /= sqrt_sw ** 2 + + if y.ndim == 1: + errors.shape = (-1,) + c.shape = (-1,) + return errors ** 2, c def _values(self, alpha, y, v, Q, QT_y): # don't construct matrix G, instead compute action on y & diagonal @@ -835,7 +858,7 @@ def _values(self, alpha, y, v, Q, QT_y): G_diag = G_diag[:, np.newaxis] return y - (c / G_diag), c - def _pre_compute_svd(self, X, y): + def _pre_compute_svd(self, X, y, sample_weight=None): if sparse.issparse(X): raise TypeError("SVD not supported for sparse matrices") U, s, _ = linalg.svd(X, full_matrices=0) @@ -843,7 +866,7 @@ def _pre_compute_svd(self, X, y): UT_y = np.dot(U.T, y) return v, U, UT_y - def _errors_svd(self, alpha, y, v, U, UT_y): + def _errors_svd(self, alpha, y, v, U, UT_y, sample_weight=None): w = ((v + alpha) ** -1) - (alpha ** -1) c = np.dot(U, self._diag_dot(w, UT_y)) + (alpha ** -1) * y G_diag = self._decomp_diag(w, U) + (alpha ** -1) @@ -889,7 +912,7 @@ def fit(self, X, y, sample_weight=None): sample_weight=sample_weight) gcv_mode = self.gcv_mode - with_sw = len(np.shape(sample_weight)) + with_sw = sample_weight is not None if gcv_mode is None or gcv_mode == 'auto': if sparse.issparse(X) or n_features > n_samples or with_sw: @@ -914,7 +937,7 @@ def fit(self, X, y, sample_weight=None): else: raise ValueError('bad gcv_mode "%s"' % gcv_mode) - v, Q, QT_y = _pre_compute(X, y) + v, Q, QT_y = _pre_compute(X, y, sample_weight) n_y = 1 if len(y.shape) == 1 else y.shape[1] cv_values = np.zeros((n_samples * n_y, len(self.alphas))) C = [] @@ -923,13 +946,10 @@ def fit(self, X, y, sample_weight=None): error = scorer is None for i, alpha in enumerate(self.alphas): - weighted_alpha = (sample_weight * alpha - if sample_weight is not None - else alpha) if error: - out, c = _errors(weighted_alpha, y, v, Q, QT_y) + out, c = _errors(alpha, y, v, Q, QT_y, sample_weight) else: - out, c = _values(weighted_alpha, y, v, Q, QT_y) + out, c = _values(alpha, y, v, Q, QT_y) cv_values[:, i] = out.ravel() C.append(c) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 6f0b00b1dc54f..fb951e63a1bbf 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -31,7 +31,8 @@ from sklearn.grid_search import GridSearchCV -from sklearn.cross_validation import KFold +from sklearn.cross_validation import KFold, LeaveOneOut +from sklearn.utils import check_random_state diabetes = datasets.load_diabetes() @@ -715,3 +716,57 @@ def test_ridge_fit_intercept_sparse(): assert_warns(UserWarning, sparse.fit, X_csr, y) assert_almost_equal(dense.intercept_, sparse.intercept_) assert_array_almost_equal(dense.coef_, sparse.coef_) + + +def make_noisy_forward_data( + n_samples=100, + n_features=200, + n_targets=10, + train_frac=.8, + noise_levels=None, + random_state=42): + """Creates a simple, dense, noisy forward linear model with multiple + output.""" + rng = check_random_state(random_state) + n_train = int(train_frac * n_samples) + train = slice(None, n_train) + test = slice(n_train, None) + X = rng.randn(n_samples, n_features) + W = rng.randn(n_features, n_targets) + Y_clean = X.dot(W) + if noise_levels is None: + noise_levels = rng.randn(n_targets) ** 2 + noise_levels = np.atleast_1d(noise_levels) * np.ones(n_targets) + noise = rng.randn(*Y_clean.shape) * noise_levels * Y_clean.std(0) + Y = Y_clean + noise + return X, Y, W, train, test + + +def test_ridge_gcv_with_sample_weights(): + + n_samples, n_features, n_targets = 20, 5, 7 + X, Y = datasets.make_regression(n_samples, n_features, + n_targets=n_targets) + alphas = np.logspace(-3, 3, 9) + + rng = np.random.RandomState(42) + sample_weights = rng.randn(n_samples) ** 2 + cv = LeaveOneOut(n_samples) + cv_predictions = np.array([[ + Ridge(solver='cholesky', alpha=alpha, fit_intercept=False).fit( + X[train], Y[train], sample_weight=sample_weights[train] + ).predict(X[test]) + for train, test in cv] for alpha in alphas]).squeeze() + + cv_errors = Y[np.newaxis] - cv_predictions.reshape( + len(alphas), n_samples, n_targets) + + ridge_gcv = _RidgeGCV(alphas=alphas, store_cv_values=True, + gcv_mode='eigen', fit_intercept=False) + # emulate the sample weight stuff from _RidgeGCV + ridge_gcv.fit(X, Y, sample_weight=sample_weights) + loo_predictions = ridge_gcv.cv_values_ + + assert_array_almost_equal(cv_errors ** 2, + loo_predictions.transpose(2, 0, 1)) +