Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions doc/whats_new/v0.23.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Changelog

- |Enhancement| Functions :func:`datasets.make_circles` and
:func:`datasets.make_moons` now accept two-element tuple.
:pr:`15707` by :user:`Maciej J Mikulski <mjmikulski>`
:pr:`15707` by :user:`Maciej J Mikulski <mjmikulski>`.

:mod:`sklearn.linear_model`
...........................
Expand All @@ -66,6 +66,13 @@ Changelog
the wrapped `base_estimator` during the fitting of the final model.
:pr:`15573` by :user:`Jeremy Alexandre <J-A16>`.

- |Efficiency| :class:`linear_model.RidgeCV` and
:class:`linear_model.RidgeClassifierCV` now does not allocate a
potentially large array to store dual coefficients for all hyperparameters
during its `fit`, nor an array to store all error or LOO predictions unless
`store_cv_values` is `True`.
:pr:`15652` by :user:`Jérôme Dockès <jeromedockes>`.

:mod:`sklearn.preprocessing`
............................

Expand All @@ -78,4 +85,3 @@ Changelog
- |Fix| :func:`tree.plot_tree` `rotate` parameter was unused and has been
deprecated.
:pr:`15806` by :user:`Chiara Marmo <cmarmo>`.

63 changes: 37 additions & 26 deletions sklearn/linear_model/_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,16 @@ def _matmat(self, v):
return res


class _IdentityEstimator:
"""Hack to call a scorer when we already have the predictions."""

def decision_function(self, y_predict):
return y_predict

def predict(self, y_predict):
return y_predict


class _RidgeGCV(LinearModel):
"""Ridge regression with built-in Generalized Cross-Validation

Expand Down Expand Up @@ -1087,6 +1097,10 @@ class _RidgeGCV(LinearModel):

looe = y - loov = c / diag(G^-1)

The best score (negative mean squared error or user-provided scoring) is
stored in the `best_score_` attribute, and the selected hyperparameter in
`alpha_`.

References
----------
http://cbcl.mit.edu/publications/ps/MIT-CSAIL-TR-2007-025.pdf
Expand Down Expand Up @@ -1462,43 +1476,40 @@ def fit(self, X, y, sample_weight=None):
else:
sqrt_sw = np.ones(X.shape[0], dtype=X.dtype)

X_mean, *decomposition = decompose(X, y, sqrt_sw)

scorer = check_scoring(self, scoring=self.scoring, allow_none=True)
error = scorer is None

n_y = 1 if len(y.shape) == 1 else y.shape[1]
cv_values = np.zeros((n_samples * n_y, len(self.alphas)),
dtype=X.dtype)
C = []
X_mean, *decomposition = decompose(X, y, sqrt_sw)

if self.store_cv_values:
self.cv_values_ = np.empty(
(n_samples * n_y, len(self.alphas)), dtype=X.dtype)

best_coef, best_score, best_alpha = None, None, None

for i, alpha in enumerate(self.alphas):
G_inverse_diag, c = solve(
float(alpha), y, sqrt_sw, X_mean, *decomposition)
if error:
squared_errors = (c / G_inverse_diag) ** 2
cv_values[:, i] = squared_errors.ravel()
alpha_score = -squared_errors.mean()
if self.store_cv_values:
self.cv_values_[:, i] = squared_errors.ravel()
else:
predictions = y - (c / G_inverse_diag)
cv_values[:, i] = predictions.ravel()
C.append(c)
alpha_score = scorer(
_IdentityEstimator(), predictions.ravel(), y.ravel())
if self.store_cv_values:
self.cv_values_[:, i] = predictions.ravel()

if error:
best = cv_values.mean(axis=0).argmin()
else:
# The scorer want an object that will make the predictions but
# they are already computed efficiently by _RidgeGCV. This
# identity_estimator will just return them
def identity_estimator():
pass
identity_estimator.decision_function = lambda y_predict: y_predict
identity_estimator.predict = lambda y_predict: y_predict

# signature of scorer is (estimator, X, y)
out = [scorer(identity_estimator, cv_values[:, i], y.ravel())
for i in range(len(self.alphas))]
best = np.argmax(out)

self.alpha_ = self.alphas[best]
self.dual_coef_ = C[best]
if (best_score is None) or (alpha_score > best_score):
best_coef, best_score, best_alpha = c, alpha_score, alpha

self.alpha_ = best_alpha
self.best_score_ = best_score
self.dual_coef_ = best_coef
self.coef_ = safe_sparse_dot(self.dual_coef_.T, X)

X_offset += X_mean * X_scale
Expand All @@ -1509,7 +1520,7 @@ def identity_estimator():
cv_values_shape = n_samples, len(self.alphas)
else:
cv_values_shape = n_samples, n_y, len(self.alphas)
self.cv_values_ = cv_values.reshape(cv_values_shape)
self.cv_values_ = self.cv_values_.reshape(cv_values_shape)

return self

Expand Down
21 changes: 18 additions & 3 deletions sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from sklearn.linear_model._ridge import _check_gcv_mode
from sklearn.linear_model._ridge import _X_CenterStackOp
from sklearn.datasets import make_regression
from sklearn.datasets import make_classification

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, GroupKFold, cross_val_predict
Expand Down Expand Up @@ -661,6 +662,19 @@ def _test_ridge_cv(filter_):
assert type(ridge_cv.intercept_) == np.float64


@pytest.mark.parametrize(
"ridge, make_dataset",
[(RidgeCV(), make_regression),
(RidgeClassifierCV(), make_classification)]
)
def test_ridge_gcv_cv_values_not_stored(ridge, make_dataset):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note that the previous implementation would have passed this test because the cv values were stored in a local variable during fit

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True and we don't have a real good test to detect the enhancement actually.

# Check that `cv_values_` is not stored when store_cv_values is False
X, y = make_dataset(n_samples=6, random_state=42)
ridge.set_params(store_cv_values=False)
ridge.fit(X, y)
assert not hasattr(ridge, "cv_values_")


def _test_ridge_diabetes(filter_):
ridge = Ridge(fit_intercept=False)
ridge.fit(filter_(X_diabetes), y_diabetes)
Expand Down Expand Up @@ -818,7 +832,8 @@ def test_class_weights_cv():
assert_array_equal(reg.predict([[-.2, 2]]), np.array([-1]))


def test_ridgecv_store_cv_values():
@pytest.mark.parametrize("scoring", [None, 'neg_mean_squared_error'])
def test_ridgecv_store_cv_values(scoring):
rng = np.random.RandomState(42)

n_samples = 8
Expand All @@ -827,7 +842,7 @@ def test_ridgecv_store_cv_values():
alphas = [1e-1, 1e0, 1e1]
n_alphas = len(alphas)

r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True)
r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True, scoring=scoring)

# with len(y.shape) == 1
y = rng.randn(n_samples)
Expand All @@ -840,7 +855,7 @@ def test_ridgecv_store_cv_values():
r.fit(x, y)
assert r.cv_values_.shape == (n_samples, n_targets, n_alphas)

r = RidgeCV(cv=3, store_cv_values=True)
r = RidgeCV(cv=3, store_cv_values=True, scoring=scoring)
assert_raises_regex(ValueError, 'cv!=None and store_cv_values',
r.fit, x, y)

Expand Down