Skip to content

FIX Covariance matrix in BayesianRidge #31094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- :class:`linear_model.BayesianRidge` now uses the full SVD to correctly estimate
the posterior covariance matrix `sigma_` when `n_samples < n_features`.
By :user:`Antoine Baker <antoinebaker>`
20 changes: 15 additions & 5 deletions sklearn/linear_model/_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,19 @@ def fit(self, X, y, sample_weight=None):
coef_old_ = None

XT_y = np.dot(X.T, y)
U, S, Vh = linalg.svd(X, full_matrices=False)
# Let M, N = n_samples, n_features and K = min(M, N).
# The posterior covariance matrix needs Vh_full: (N, N).
# The full SVD is only required when n_samples < n_features.
# When n_samples < n_features, K=M and full_matrices=True
# U: (M, M), S: M, Vh_full: (N, N), Vh: (M, N)
# When n_samples > n_features, K=N and full_matrices=False
# U: (M, N), S: N, Vh_full: (N, N), Vh: (N, N)
U, S, Vh_full = linalg.svd(X, full_matrices=(n_samples < n_features))
K = len(S)
eigen_vals_ = S**2
eigen_vals_full = np.zeros(n_features, dtype=dtype)
eigen_vals_full[0:K] = eigen_vals_
Vh = Vh_full[0:K, :]

# Convergence loop of the bayesian ridge regression
for iter_ in range(self.max_iter):
Expand Down Expand Up @@ -353,11 +364,10 @@ def fit(self, X, y, sample_weight=None):
self.scores_.append(s)
self.scores_ = np.array(self.scores_)

# posterior covariance is given by 1/alpha_ * scaled_sigma_
scaled_sigma_ = np.dot(
Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis]
# posterior covariance
self.sigma_ = np.dot(
Vh_full.T, Vh_full / (alpha_ * eigen_vals_full + lambda_)[:, np.newaxis]
)
self.sigma_ = (1.0 / alpha_) * scaled_sigma_

self._set_intercept(X_offset_, y_offset_, X_scale_)

Expand Down
17 changes: 17 additions & 0 deletions sklearn/linear_model/tests/test_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from sklearn.utils import check_random_state
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_almost_equal,
assert_array_almost_equal,
assert_array_less,
Expand Down Expand Up @@ -94,6 +95,22 @@ def test_bayesian_ridge_parameter():
assert_almost_equal(rr_model.intercept_, br_model.intercept_)


@pytest.mark.parametrize("n_samples, n_features", [(10, 20), (20, 10)])
def test_bayesian_covariance_matrix(n_samples, n_features, global_random_seed):
"""Check the posterior covariance matrix sigma_

Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/31093
"""
X, y = datasets.make_regression(
n_samples, n_features, random_state=global_random_seed
)
reg = BayesianRidge(fit_intercept=False).fit(X, y)
covariance_matrix = np.linalg.inv(
reg.lambda_ * np.identity(n_features) + reg.alpha_ * np.dot(X.T, X)
)
assert_allclose(reg.sigma_, covariance_matrix, rtol=1e-6)


def test_bayesian_sample_weights():
# Test correctness of the sample_weights method
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
Expand Down