scikit-learn · antoinebaker · Oct 8, 2024 · Oct 9, 2024 · Oct 10, 2024 · Oct 11, 2024
diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
@@ -261,6 +261,10 @@ Changelog
 :mod:`sklearn.linear_model`
 ...........................
 
+- |Fix| :class:`linear_model.LinearRegression` replaces the `scipy.linalg.lstsq`
+  solver by `numpy.linalg.lstsq` and sets the `rcond` parameter.
+  :pr:`30030` by :user:`Antoine Baker <antoinebaker>`.
+
 - |Fix| :class:`linear_model.LogisticRegressionCV` corrects sample weight handling
   for the calculation of test scores.
   :pr:`29419` by :user:`Shruti Nath <snath-xoc>`.

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
@@ -12,7 +12,7 @@
 
 import numpy as np
 import scipy.sparse as sp
-from scipy import linalg, optimize, sparse
+from scipy import optimize, sparse
 from scipy.sparse.linalg import lsqr
 from scipy.special import expit
 
@@ -525,7 +525,7 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
     Notes
     -----
     From the implementation point of view, this is just plain Ordinary
-    Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares
+    Least Squares (numpy.linalg.lstsq) or Non Negative Least Squares
     (scipy.optimize.nnls) wrapped as a predictor object.
 
     Examples
@@ -673,7 +673,11 @@ def rmatvec(b):
                 )
                 self.coef_ = np.vstack([out[0] for out in outs])
         else:
-            self.coef_, _, self.rank_, self.singular_ = linalg.lstsq(X, y)
+            # cut-off ratio for small singular values (numpy 2.0 default value)
+            rcond = max(X.shape) * np.finfo(X.dtype).eps
+            self.coef_, _, self.rank_, self.singular_ = np.linalg.lstsq(
+                X, y, rcond=rcond
+            )
             self.coef_ = self.coef_.T
 
         if y.ndim == 1:

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
@@ -754,17 +754,9 @@ def test_linear_regression_sample_weight_consistency(
     if fit_intercept:
         intercept_0 = reg.intercept_
     reg.fit(X[:-5], y[:-5], sample_weight=sample_weight[:-5])
-    if fit_intercept and sparse_container is None:
-        # FIXME: https://github.com/scikit-learn/scikit-learn/issues/26164
-        # This often fails, e.g. when calling
-        # SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all" pytest \
-        # sklearn/linear_model/tests/test_base.py\
-        # ::test_linear_regression_sample_weight_consistency
-        pass
-    else:
-        assert_allclose(reg.coef_, coef_0, rtol=1e-5)
-        if fit_intercept:
-            assert_allclose(reg.intercept_, intercept_0)
+    assert_allclose(reg.coef_, coef_0, rtol=1e-5)
+    if fit_intercept:
+        assert_allclose(reg.intercept_, intercept_0)
 
     # 5) check that multiplying sample_weight by 2 is equivalent to repeating
     # corresponding samples twice