From 4b141b1b42f64a56158782fe1598c6875bf5256b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 18 Nov 2019 16:37:27 +0100
Subject: [PATCH 1/8] ENH do not allocate local arrays in Ridge*CV of
 store_cv_vales is False

---
 sklearn/linear_model/_ridge.py           | 63 ++++++++++++++----------
 sklearn/linear_model/tests/test_ridge.py | 13 +++++
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 9e1dd7f22085d..1c0407066048c 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1054,6 +1054,16 @@ def _matmat(self, v):
         return res
 
 
+class _IdentityEstimator:
+    """Hack to call a scorer when we already have the predictions."""
+
+    def decision_function(self, y_predict):
+        return y_predict
+
+    def predict(self, y_predict):
+        return y_predict
+
+
 class _RidgeGCV(LinearModel):
     """Ridge regression with built-in Generalized Cross-Validation
 
@@ -1087,6 +1097,10 @@ class _RidgeGCV(LinearModel):
 
     looe = y - loov = c / diag(G^-1)
 
+    The best score (negative mean squared error or user-provided scoring) is
+    stored in the `best_score_` attribute, and the selected hyperparameter in
+    `alpha_`.
+
     References
     ----------
     http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf
@@ -1462,43 +1476,40 @@ def fit(self, X, y, sample_weight=None):
         else:
             sqrt_sw = np.ones(X.shape[0], dtype=X.dtype)
 
+        X_mean, *decomposition = decompose(X, y, sqrt_sw)
+
         scorer = check_scoring(self, scoring=self.scoring, allow_none=True)
         error = scorer is None
 
         n_y = 1 if len(y.shape) == 1 else y.shape[1]
-        cv_values = np.zeros((n_samples * n_y, len(self.alphas)),
-                             dtype=X.dtype)
-        C = []
-        X_mean, *decomposition = decompose(X, y, sqrt_sw)
+
+        if self.store_cv_values:
+            self.cv_values_ = np.empty(
+                (n_samples * n_y, len(self.alphas)), dtype=X.dtype)
+
+        best_coef, best_score, best_alpha = None, None, None
+
         for i, alpha in enumerate(self.alphas):
             G_inverse_diag, c = solve(
                 float(alpha), y, sqrt_sw, X_mean, *decomposition)
             if error:
                 squared_errors = (c / G_inverse_diag) ** 2
-                cv_values[:, i] = squared_errors.ravel()
+                alpha_score = -squared_errors.mean()
+                if self.store_cv_values:
+                    self.cv_values_[:, i] = squared_errors.ravel()
             else:
                 predictions = y - (c / G_inverse_diag)
-                cv_values[:, i] = predictions.ravel()
-            C.append(c)
+                alpha_score = scorer(
+                    _IdentityEstimator(), predictions.ravel(), y.ravel())
+                if self.store_cv_values:
+                    self.cv_values_[:, i] = predictions.ravel()
 
-        if error:
-            best = cv_values.mean(axis=0).argmin()
-        else:
-            # The scorer want an object that will make the predictions but
-            # they are already computed efficiently by _RidgeGCV. This
-            # identity_estimator will just return them
-            def identity_estimator():
-                pass
-            identity_estimator.decision_function = lambda y_predict: y_predict
-            identity_estimator.predict = lambda y_predict: y_predict
-
-            # signature of scorer is (estimator, X, y)
-            out = [scorer(identity_estimator, cv_values[:, i], y.ravel())
-                   for i in range(len(self.alphas))]
-            best = np.argmax(out)
-
-        self.alpha_ = self.alphas[best]
-        self.dual_coef_ = C[best]
+            if (best_score is None) or (alpha_score > best_score):
+                best_coef, best_score, best_alpha = c, alpha_score, alpha
+
+        self.alpha_ = best_alpha
+        self.best_score_ = best_score
+        self.dual_coef_ = best_coef
         self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)
 
         X_offset += X_mean * X_scale
@@ -1509,7 +1520,7 @@ def identity_estimator():
                 cv_values_shape = n_samples, len(self.alphas)
             else:
                 cv_values_shape = n_samples, n_y, len(self.alphas)
-            self.cv_values_ = cv_values.reshape(cv_values_shape)
+            self.cv_values_ = self.cv_values_.reshape(cv_values_shape)
 
         return self
 
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index c786b154fcb85..17229a87fdafb 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -34,6 +34,7 @@
 from sklearn.linear_model._ridge import _check_gcv_mode
 from sklearn.linear_model._ridge import _X_CenterStackOp
 from sklearn.datasets import make_regression
+from sklearn.datasets import make_classification
 
 from sklearn.model_selection import GridSearchCV
 from sklearn.model_selection import KFold, GroupKFold, cross_val_predict
@@ -661,6 +662,18 @@ def _test_ridge_cv(filter_):
     assert type(ridge_cv.intercept_) == np.float64
 
 
+@pytest.mark.parametrize(
+    "ridge, make_dataset",
+    [(RidgeCV(store_cv_values=False), make_regression),
+     (RidgeClassifierCV(store_cv_values=False), make_classification)]
+)
+def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset):
+    # Check that `cv_values_` is not stored when store_cv_values is False
+    X, y = make_dataset(n_samples=6, random_state=42)
+    ridge.fit(X, y)
+    assert not hasattr(ridge, "cv_values_")
+
+
 def _test_ridge_diabetes(filter_):
     ridge = Ridge(fit_intercept=False)
     ridge.fit(filter_(X_diabetes), y_diabetes)

From 74d81d65c9beeb2971002d33c3b93e7d83980c6f Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 18 Nov 2019 16:41:01 +0100
Subject: [PATCH 2/8] add whats new

---
 doc/whats_new/v0.22.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 8f4dd76c08677..d82ae7fa5900f 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -532,6 +532,13 @@ Changelog
 - |Fix| The liblinear solver now supports ``sample_weight``.
   :pr:`15038` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |Efficiency| :class:`linear_model.RidgeCV` and
+  :class:`linear_model.RidgeClassifierCV` now does not allocate a
+  potentially large array to store dual coefficients for all hyperparameters
+  during its `fit`, nor an array to store all LOO predictions unless
+  `store_cv_values` is `True`.
+  :pr:`15652` by :user:`Jérôme Dockès <jeromedockes>`.
+
 :mod:`sklearn.manifold`
 .......................
 

From 832df73d7d7e6541076fdcdbd6301f8859b533e0 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 18 Nov 2019 16:56:36 +0100
Subject: [PATCH 3/8] FIX add best_score_ to Ridge*CV estimators

---
 doc/whats_new/v0.22.rst                  |  4 ++++
 sklearn/linear_model/_ridge.py           |  2 ++
 sklearn/linear_model/tests/test_ridge.py | 12 ++++++++++++
 3 files changed, 18 insertions(+)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index d82ae7fa5900f..6fa24fc14a11d 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -539,6 +539,10 @@ Changelog
   `store_cv_values` is `True`.
   :pr:`15652` by :user:`Jérôme Dockès <jeromedockes>`.
 
+- |Fix| add `best_score_` attribute to :class:`linear_model.RidgeCV` and
+  :class:`linear_model.RidgeClassifierCV`.
+  :pr:`15653` by :user:`Jérôme Dockès <jeromedockes>`.
+
 :mod:`sklearn.manifold`
 .......................
 
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 1c0407066048c..2d2e1dd27b5e5 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1576,6 +1576,7 @@ def fit(self, X, y, sample_weight=None):
                                   store_cv_values=self.store_cv_values)
             estimator.fit(X, y, sample_weight=sample_weight)
             self.alpha_ = estimator.alpha_
+            self.best_score_ = estimator.best_score_
             if self.store_cv_values:
                 self.cv_values_ = estimator.cv_values_
         else:
@@ -1591,6 +1592,7 @@ def fit(self, X, y, sample_weight=None):
             gs.fit(X, y, sample_weight=sample_weight)
             estimator = gs.best_estimator_
             self.alpha_ = gs.best_estimator_.alpha
+            self.best_score_ = gs.best_score_
 
         self.coef_ = estimator.coef_
         self.intercept_ = estimator.intercept_
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 17229a87fdafb..2199eda823b47 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -674,6 +674,18 @@ def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset):
     assert not hasattr(ridge, "cv_values_")
 
 
+@pytest.mark.parametrize(
+    "ridge, make_dataset",
+    [(RidgeCV(store_cv_values=False), make_regression),
+     (RidgeClassifierCV(store_cv_values=False), make_classification)]
+)
+def test_ridge_best_score(ridge, make_dataset):
+    # check that the best_score_ is store
+    X, y = make_dataset(n_samples=6, random_state=42)
+    ridge.fit(X, y)
+    assert hasattr(ridge, "best_score_")
+
+
 def _test_ridge_diabetes(filter_):
     ridge = Ridge(fit_intercept=False)
     ridge.fit(filter_(X_diabetes), y_diabetes)

From 32d46ee8fce4c5965f8c8f5f2f16d42bdc4194a6 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 18 Nov 2019 17:02:33 +0100
Subject: [PATCH 4/8] DOC add attributes in docstring

---
 sklearn/linear_model/_ridge.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 2d2e1dd27b5e5..54c401c1a84d4 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1695,6 +1695,9 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
     alpha_ : float
         Estimated regularization parameter.
 
+    best_score_ : float
+        Score of best_estimator_ on the hold out data.
+
     Examples
     --------
     >>> from sklearn.datasets import load_diabetes
@@ -1799,6 +1802,9 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
     alpha_ : float
         Estimated regularization parameter
 
+    best_score_ : float
+        Score of best_estimator_ on the hold out data.
+
     classes_ : array of shape (n_classes,)
         The classes labels.
 

From 2884150bdc88c1a56d52df175db5679c0f3b3c45 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 9 Dec 2019 16:45:54 +0100
Subject: [PATCH 5/8] doc

---
 doc/whats_new/v0.22.rst | 11 -----------
 doc/whats_new/v0.23.rst |  4 ++++
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 5b9802328a7a9..af08b832e9f6f 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -548,17 +548,6 @@ Changelog
 - |Fix| The liblinear solver now supports ``sample_weight``.
   :pr:`15038` by `Guillaume Lemaitre`_.
 
-- |Efficiency| :class:`linear_model.RidgeCV` and
-  :class:`linear_model.RidgeClassifierCV` now does not allocate a
-  potentially large array to store dual coefficients for all hyperparameters
-  during its `fit`, nor an array to store all LOO predictions unless
-  `store_cv_values` is `True`.
-  :pr:`15652` by :user:`Jérôme Dockès <jeromedockes>`.
-
-- |Fix| add `best_score_` attribute to :class:`linear_model.RidgeCV` and
-  :class:`linear_model.RidgeClassifierCV`.
-  :pr:`15653` by :user:`Jérôme Dockès <jeromedockes>`.
-
 :mod:`sklearn.manifold`
 .......................
 
diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
index 3de4c40e109c7..e0efe3217841d 100644
--- a/doc/whats_new/v0.23.rst
+++ b/doc/whats_new/v0.23.rst
@@ -73,6 +73,10 @@ Changelog
   `store_cv_values` is `True`.
   :pr:`15652` by :user:`Jérôme Dockès <jeromedockes>`.
 
+- |Fix| add `best_score_` attribute to :class:`linear_model.RidgeCV` and
+  :class:`linear_model.RidgeClassifierCV`.
+  :pr:`15653` by :user:`Jérôme Dockès <jeromedockes>`.
+
 :mod:`sklearn.preprocessing`
 ............................
 

From 37ba6ec65d5d469e7d424fb8ba2049e4f9a94c38 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 9 Dec 2019 18:20:50 +0100
Subject: [PATCH 6/8] fix docstring

---
 sklearn/linear_model/_ridge.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 54c401c1a84d4..92a661133b8ea 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1696,7 +1696,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
         Estimated regularization parameter.
 
     best_score_ : float
-        Score of best_estimator_ on the hold out data.
+        Score of base estimator with best alpha on the hold out data.
 
     Examples
     --------
@@ -1800,10 +1800,10 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
         ``fit_intercept = False``.
 
     alpha_ : float
-        Estimated regularization parameter
+        Estimated regularization parameter.
 
     best_score_ : float
-        Score of best_estimator_ on the hold out data.
+        Score of base estimator with best alpha on the hold out data.
 
     classes_ : array of shape (n_classes,)
         The classes labels.

From 92ce5d049defeaf30b5a2b2ac276c3f21551550c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 10 Dec 2019 11:31:32 +0100
Subject: [PATCH 7/8] check dtype for best_score_

---
 sklearn/linear_model/tests/test_ridge.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 814e063ea10e4..a92e830aba66e 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -676,14 +676,17 @@ def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset):
 
 @pytest.mark.parametrize(
     "ridge, make_dataset",
-    [(RidgeCV(store_cv_values=False), make_regression),
-     (RidgeClassifierCV(store_cv_values=False), make_classification)]
+    [(RidgeCV(), make_regression),
+     (RidgeClassifierCV(), make_classification)]
 )
-def test_ridge_best_score(ridge, make_dataset):
+@pytest.mark.parametrize("cv", [None, 3])
+def test_ridge_best_score(ridge, make_dataset, cv):
     # check that the best_score_ is store
     X, y = make_dataset(n_samples=6, random_state=42)
+    ridge.set_params(store_cv_values=False, cv=cv)
     ridge.fit(X, y)
     assert hasattr(ridge, "best_score_")
+    assert isinstance(ridge.best_score_, float)
 
 
 def _test_ridge_diabetes(filter_):

From 4dbf6cdaf6c9edaa9e0c179d3a98ece229142069 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 10 Dec 2019 16:16:58 +0100
Subject: [PATCH 8/8] docstring

---
 sklearn/linear_model/_ridge.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 92a661133b8ea..3b54a4eb5b3cb 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1696,7 +1696,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
         Estimated regularization parameter.
 
     best_score_ : float
-        Score of base estimator with best alpha on the hold out data.
+        Mean cross-validated score of the estimator with the best alpha found.
 
     Examples
     --------
@@ -1803,7 +1803,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV):
         Estimated regularization parameter.
 
     best_score_ : float
-        Score of base estimator with best alpha on the hold out data.
+        Mean cross-validated score of the estimator with the best alpha found.
 
     classes_ : array of shape (n_classes,)
         The classes labels.