FIX Covariance matrix in BayesianRidge (#31094)

antoinebaker · ogrisel · jeremiedbb · web-flow · commit b90e09dceb1f · 2025-04-13T14:46:20.000Z
Co-authored-by: Olivier Grisel &lt;olivier.grisel@ensta.org&gt;
Co-authored-by: Jérémie du Boisberranger &lt;jeremie@probabl.ai&gt;
diff --git a/doc/whats_new/upcoming_changes/sklearn.linear_model/31094.fix.rst b/doc/whats_new/upcoming_changes/sklearn.linear_model/31094.fix.rst
@@ -0,0 +1,3 @@
+- :class:`linear_model.BayesianRidge` now uses the full SVD to correctly estimate
+  the posterior covariance matrix `sigma_` when `n_samples < n_features`.
+  By :user:`Antoine Baker <antoinebaker>`
diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py
@@ -293,8 +293,19 @@ def fit(self, X, y, sample_weight=None):
         coef_old_ = None
 
         XT_y = np.dot(X.T, y)
-        U, S, Vh = linalg.svd(X, full_matrices=False)
+        # Let M, N = n_samples, n_features and K = min(M, N).
+        # The posterior covariance matrix needs Vh_full: (N, N).
+        # The full SVD is only required when n_samples < n_features.
+        # When n_samples < n_features, K=M and full_matrices=True
+        # U: (M, M), S: M, Vh_full: (N, N), Vh: (M, N)
+        # When n_samples > n_features, K=N and full_matrices=False
+        # U: (M, N), S: N, Vh_full: (N, N), Vh: (N, N)
+        U, S, Vh_full = linalg.svd(X, full_matrices=(n_samples < n_features))
+        K = len(S)
         eigen_vals_ = S**2
+        eigen_vals_full = np.zeros(n_features, dtype=dtype)
+        eigen_vals_full[0:K] = eigen_vals_
+        Vh = Vh_full[0:K, :]
 
         # Convergence loop of the bayesian ridge regression
         for iter_ in range(self.max_iter):
@@ -353,11 +364,10 @@ def fit(self, X, y, sample_weight=None):
             self.scores_.append(s)
             self.scores_ = np.array(self.scores_)
 
-        # posterior covariance is given by 1/alpha_ * scaled_sigma_
-        scaled_sigma_ = np.dot(
-            Vh.T, Vh / (eigen_vals_ + lambda_ / alpha_)[:, np.newaxis]
+        # posterior covariance
+        self.sigma_ = np.dot(
+            Vh_full.T, Vh_full / (alpha_ * eigen_vals_full + lambda_)[:, np.newaxis]
         )
-        self.sigma_ = (1.0 / alpha_) * scaled_sigma_
 
         self._set_intercept(X_offset_, y_offset_, X_scale_)
 
diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py
@@ -11,6 +11,7 @@
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import (
     _convert_container,
+    assert_allclose,
     assert_almost_equal,
     assert_array_almost_equal,
     assert_array_less,
@@ -94,6 +95,22 @@ def test_bayesian_ridge_parameter():
     assert_almost_equal(rr_model.intercept_, br_model.intercept_)
 
 
+@pytest.mark.parametrize("n_samples, n_features", [(10, 20), (20, 10)])
+def test_bayesian_covariance_matrix(n_samples, n_features, global_random_seed):
+    """Check the posterior covariance matrix sigma_
+
+    Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/31093
+    """
+    X, y = datasets.make_regression(
+        n_samples, n_features, random_state=global_random_seed
+    )
+    reg = BayesianRidge(fit_intercept=False).fit(X, y)
+    covariance_matrix = np.linalg.inv(
+        reg.lambda_ * np.identity(n_features) + reg.alpha_ * np.dot(X.T, X)
+    )
+    assert_allclose(reg.sigma_, covariance_matrix, rtol=1e-6)
+
+
 def test_bayesian_sample_weights():
     # Test correctness of the sample_weights method
     X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+- :class:`linear_model.BayesianRidge` now uses the full SVD to correctly estimate
	`2`	+ the posterior covariance matrix `sigma_` when `n_samples < n_features`.
	`3`	+ By :user:`Antoine Baker <antoinebaker>`