From 4b141b1b42f64a56158782fe1598c6875bf5256b Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 18 Nov 2019 16:37:27 +0100 Subject: [PATCH 1/8] ENH do not allocate local arrays in Ridge*CV of store_cv_vales is False --- sklearn/linear_model/_ridge.py | 63 ++++++++++++++---------- sklearn/linear_model/tests/test_ridge.py | 13 +++++ 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 9e1dd7f22085d..1c0407066048c 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -1054,6 +1054,16 @@ def _matmat(self, v): return res +class _IdentityEstimator: + """Hack to call a scorer when we already have the predictions.""" + + def decision_function(self, y_predict): + return y_predict + + def predict(self, y_predict): + return y_predict + + class _RidgeGCV(LinearModel): """Ridge regression with built-in Generalized Cross-Validation @@ -1087,6 +1097,10 @@ class _RidgeGCV(LinearModel): looe = y - loov = c / diag(G^-1) + The best score (negative mean squared error or user-provided scoring) is + stored in the `best_score_` attribute, and the selected hyperparameter in + `alpha_`. + References ---------- http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf @@ -1462,43 +1476,40 @@ def fit(self, X, y, sample_weight=None): else: sqrt_sw = np.ones(X.shape[0], dtype=X.dtype) + X_mean, *decomposition = decompose(X, y, sqrt_sw) + scorer = check_scoring(self, scoring=self.scoring, allow_none=True) error = scorer is None n_y = 1 if len(y.shape) == 1 else y.shape[1] - cv_values = np.zeros((n_samples * n_y, len(self.alphas)), - dtype=X.dtype) - C = [] - X_mean, *decomposition = decompose(X, y, sqrt_sw) + + if self.store_cv_values: + self.cv_values_ = np.empty( + (n_samples * n_y, len(self.alphas)), dtype=X.dtype) + + best_coef, best_score, best_alpha = None, None, None + for i, alpha in enumerate(self.alphas): G_inverse_diag, c = solve( float(alpha), y, sqrt_sw, X_mean, *decomposition) if error: squared_errors = (c / G_inverse_diag) ** 2 - cv_values[:, i] = squared_errors.ravel() + alpha_score = -squared_errors.mean() + if self.store_cv_values: + self.cv_values_[:, i] = squared_errors.ravel() else: predictions = y - (c / G_inverse_diag) - cv_values[:, i] = predictions.ravel() - C.append(c) + alpha_score = scorer( + _IdentityEstimator(), predictions.ravel(), y.ravel()) + if self.store_cv_values: + self.cv_values_[:, i] = predictions.ravel() - if error: - best = cv_values.mean(axis=0).argmin() - else: - # The scorer want an object that will make the predictions but - # they are already computed efficiently by _RidgeGCV. This - # identity_estimator will just return them - def identity_estimator(): - pass - identity_estimator.decision_function = lambda y_predict: y_predict - identity_estimator.predict = lambda y_predict: y_predict - - # signature of scorer is (estimator, X, y) - out = [scorer(identity_estimator, cv_values[:, i], y.ravel()) - for i in range(len(self.alphas))] - best = np.argmax(out) - - self.alpha_ = self.alphas[best] - self.dual_coef_ = C[best] + if (best_score is None) or (alpha_score > best_score): + best_coef, best_score, best_alpha = c, alpha_score, alpha + + self.alpha_ = best_alpha + self.best_score_ = best_score + self.dual_coef_ = best_coef self.coef_ = safe_sparse_dot(self.dual_coef_.T, X) X_offset += X_mean * X_scale @@ -1509,7 +1520,7 @@ def identity_estimator(): cv_values_shape = n_samples, len(self.alphas) else: cv_values_shape = n_samples, n_y, len(self.alphas) - self.cv_values_ = cv_values.reshape(cv_values_shape) + self.cv_values_ = self.cv_values_.reshape(cv_values_shape) return self diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index c786b154fcb85..17229a87fdafb 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -34,6 +34,7 @@ from sklearn.linear_model._ridge import _check_gcv_mode from sklearn.linear_model._ridge import _X_CenterStackOp from sklearn.datasets import make_regression +from sklearn.datasets import make_classification from sklearn.model_selection import GridSearchCV from sklearn.model_selection import KFold, GroupKFold, cross_val_predict @@ -661,6 +662,18 @@ def _test_ridge_cv(filter_): assert type(ridge_cv.intercept_) == np.float64 +@pytest.mark.parametrize( + "ridge, make_dataset", + [(RidgeCV(store_cv_values=False), make_regression), + (RidgeClassifierCV(store_cv_values=False), make_classification)] +) +def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset): + # Check that `cv_values_` is not stored when store_cv_values is False + X, y = make_dataset(n_samples=6, random_state=42) + ridge.fit(X, y) + assert not hasattr(ridge, "cv_values_") + + def _test_ridge_diabetes(filter_): ridge = Ridge(fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) From 74d81d65c9beeb2971002d33c3b93e7d83980c6f Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 18 Nov 2019 16:41:01 +0100 Subject: [PATCH 2/8] add whats new --- doc/whats_new/v0.22.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 8f4dd76c08677..d82ae7fa5900f 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -532,6 +532,13 @@ Changelog - |Fix| The liblinear solver now supports ``sample_weight``. :pr:`15038` by :user:`Guillaume Lemaitre `. +- |Efficiency| :class:`linear_model.RidgeCV` and + :class:`linear_model.RidgeClassifierCV` now does not allocate a + potentially large array to store dual coefficients for all hyperparameters + during its `fit`, nor an array to store all LOO predictions unless + `store_cv_values` is `True`. + :pr:`15652` by :user:`Jérôme Dockès `. + :mod:`sklearn.manifold` ....................... From 832df73d7d7e6541076fdcdbd6301f8859b533e0 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 18 Nov 2019 16:56:36 +0100 Subject: [PATCH 3/8] FIX add best_score_ to Ridge*CV estimators --- doc/whats_new/v0.22.rst | 4 ++++ sklearn/linear_model/_ridge.py | 2 ++ sklearn/linear_model/tests/test_ridge.py | 12 ++++++++++++ 3 files changed, 18 insertions(+) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index d82ae7fa5900f..6fa24fc14a11d 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -539,6 +539,10 @@ Changelog `store_cv_values` is `True`. :pr:`15652` by :user:`Jérôme Dockès `. +- |Fix| add `best_score_` attribute to :class:`linear_model.RidgeCV` and + :class:`linear_model.RidgeClassifierCV`. + :pr:`15653` by :user:`Jérôme Dockès `. + :mod:`sklearn.manifold` ....................... diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 1c0407066048c..2d2e1dd27b5e5 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -1576,6 +1576,7 @@ def fit(self, X, y, sample_weight=None): store_cv_values=self.store_cv_values) estimator.fit(X, y, sample_weight=sample_weight) self.alpha_ = estimator.alpha_ + self.best_score_ = estimator.best_score_ if self.store_cv_values: self.cv_values_ = estimator.cv_values_ else: @@ -1591,6 +1592,7 @@ def fit(self, X, y, sample_weight=None): gs.fit(X, y, sample_weight=sample_weight) estimator = gs.best_estimator_ self.alpha_ = gs.best_estimator_.alpha + self.best_score_ = gs.best_score_ self.coef_ = estimator.coef_ self.intercept_ = estimator.intercept_ diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 17229a87fdafb..2199eda823b47 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -674,6 +674,18 @@ def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset): assert not hasattr(ridge, "cv_values_") +@pytest.mark.parametrize( + "ridge, make_dataset", + [(RidgeCV(store_cv_values=False), make_regression), + (RidgeClassifierCV(store_cv_values=False), make_classification)] +) +def test_ridge_best_score(ridge, make_dataset): + # check that the best_score_ is store + X, y = make_dataset(n_samples=6, random_state=42) + ridge.fit(X, y) + assert hasattr(ridge, "best_score_") + + def _test_ridge_diabetes(filter_): ridge = Ridge(fit_intercept=False) ridge.fit(filter_(X_diabetes), y_diabetes) From 32d46ee8fce4c5965f8c8f5f2f16d42bdc4194a6 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 18 Nov 2019 17:02:33 +0100 Subject: [PATCH 4/8] DOC add attributes in docstring --- sklearn/linear_model/_ridge.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 2d2e1dd27b5e5..54c401c1a84d4 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -1695,6 +1695,9 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): alpha_ : float Estimated regularization parameter. + best_score_ : float + Score of best_estimator_ on the hold out data. + Examples -------- >>> from sklearn.datasets import load_diabetes @@ -1799,6 +1802,9 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): alpha_ : float Estimated regularization parameter + best_score_ : float + Score of best_estimator_ on the hold out data. + classes_ : array of shape (n_classes,) The classes labels. From 2884150bdc88c1a56d52df175db5679c0f3b3c45 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 9 Dec 2019 16:45:54 +0100 Subject: [PATCH 5/8] doc --- doc/whats_new/v0.22.rst | 11 ----------- doc/whats_new/v0.23.rst | 4 ++++ 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 5b9802328a7a9..af08b832e9f6f 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -548,17 +548,6 @@ Changelog - |Fix| The liblinear solver now supports ``sample_weight``. :pr:`15038` by `Guillaume Lemaitre`_. -- |Efficiency| :class:`linear_model.RidgeCV` and - :class:`linear_model.RidgeClassifierCV` now does not allocate a - potentially large array to store dual coefficients for all hyperparameters - during its `fit`, nor an array to store all LOO predictions unless - `store_cv_values` is `True`. - :pr:`15652` by :user:`Jérôme Dockès `. - -- |Fix| add `best_score_` attribute to :class:`linear_model.RidgeCV` and - :class:`linear_model.RidgeClassifierCV`. - :pr:`15653` by :user:`Jérôme Dockès `. - :mod:`sklearn.manifold` ....................... diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index 3de4c40e109c7..e0efe3217841d 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -73,6 +73,10 @@ Changelog `store_cv_values` is `True`. :pr:`15652` by :user:`Jérôme Dockès `. +- |Fix| add `best_score_` attribute to :class:`linear_model.RidgeCV` and + :class:`linear_model.RidgeClassifierCV`. + :pr:`15653` by :user:`Jérôme Dockès `. + :mod:`sklearn.preprocessing` ............................ From 37ba6ec65d5d469e7d424fb8ba2049e4f9a94c38 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Mon, 9 Dec 2019 18:20:50 +0100 Subject: [PATCH 6/8] fix docstring --- sklearn/linear_model/_ridge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 54c401c1a84d4..92a661133b8ea 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -1696,7 +1696,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): Estimated regularization parameter. best_score_ : float - Score of best_estimator_ on the hold out data. + Score of base estimator with best alpha on the hold out data. Examples -------- @@ -1800,10 +1800,10 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): ``fit_intercept = False``. alpha_ : float - Estimated regularization parameter + Estimated regularization parameter. best_score_ : float - Score of best_estimator_ on the hold out data. + Score of base estimator with best alpha on the hold out data. classes_ : array of shape (n_classes,) The classes labels. From 92ce5d049defeaf30b5a2b2ac276c3f21551550c Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 10 Dec 2019 11:31:32 +0100 Subject: [PATCH 7/8] check dtype for best_score_ --- sklearn/linear_model/tests/test_ridge.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 814e063ea10e4..a92e830aba66e 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -676,14 +676,17 @@ def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset): @pytest.mark.parametrize( "ridge, make_dataset", - [(RidgeCV(store_cv_values=False), make_regression), - (RidgeClassifierCV(store_cv_values=False), make_classification)] + [(RidgeCV(), make_regression), + (RidgeClassifierCV(), make_classification)] ) -def test_ridge_best_score(ridge, make_dataset): +@pytest.mark.parametrize("cv", [None, 3]) +def test_ridge_best_score(ridge, make_dataset, cv): # check that the best_score_ is store X, y = make_dataset(n_samples=6, random_state=42) + ridge.set_params(store_cv_values=False, cv=cv) ridge.fit(X, y) assert hasattr(ridge, "best_score_") + assert isinstance(ridge.best_score_, float) def _test_ridge_diabetes(filter_): From 4dbf6cdaf6c9edaa9e0c179d3a98ece229142069 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Tue, 10 Dec 2019 16:16:58 +0100 Subject: [PATCH 8/8] docstring --- sklearn/linear_model/_ridge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py index 92a661133b8ea..3b54a4eb5b3cb 100644 --- a/sklearn/linear_model/_ridge.py +++ b/sklearn/linear_model/_ridge.py @@ -1696,7 +1696,7 @@ class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV): Estimated regularization parameter. best_score_ : float - Score of base estimator with best alpha on the hold out data. + Mean cross-validated score of the estimator with the best alpha found. Examples -------- @@ -1803,7 +1803,7 @@ class RidgeClassifierCV(LinearClassifierMixin, _BaseRidgeCV): Estimated regularization parameter. best_score_ : float - Score of base estimator with best alpha on the hold out data. + Mean cross-validated score of the estimator with the best alpha found. classes_ : array of shape (n_classes,) The classes labels.