Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions sklearn/linear_model/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ..base import RegressorMixin
from ..utils.extmath import safe_sparse_dot
from ..utils import safe_asarray
from ..preprocessing import LabelBinarizer
from ..preprocessing import LabelBinarizer, StandardScaler, add_dummy_feature
from ..grid_search import GridSearchCV


Expand Down Expand Up @@ -192,15 +192,31 @@ def fit(self, X, y, sample_weight=1.0, solver=None):
X = safe_asarray(X, dtype=np.float)
y = np.asarray(y, dtype=np.float)

X, y, X_mean, y_mean, X_std = self._center_data(
X, y, self.fit_intercept, self.normalize, self.copy_X)

is_sparse = not hasattr(X, '__array__')
if self.fit_intercept and is_sparse:
if self.normalize:
scaler = StandardScaler(with_mean=False, with_std=True,
copy=False)
X = scaler.fit_transform(X)
X_std = scaler.std_
else:
X_std = np.ones(X.shape[1])
X_mean = np.zeros(X.shape[1])
y_mean = 0.
X = add_dummy_feature(X, value=1.0) # TODO: intercept weight
else:
X, y, X_mean, y_mean, X_std = self._center_data(
X, y, self.fit_intercept, self.normalize, self.copy_X)
self.coef_ = ridge_regression(X, y,
alpha=self.alpha,
sample_weight=sample_weight,
solver=solver,
max_iter=self.max_iter,
tol=self.tol)
if self.fit_intercept and is_sparse:
coef = np.atleast_2d(self.coef_)
self.coef_ = coef[:, 1:].squeeze()
y_mean = coef[:, 0].squeeze()
self._set_intercept(X_mean, y_mean, X_std)
return self

Expand Down
33 changes: 26 additions & 7 deletions sklearn/linear_model/tests/test_ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def _test_ridge_loo(filter_):
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T,
Y_pred, decimal=5)

return ret
yield ret


def _test_ridge_cv(filter_):
Expand All @@ -256,7 +256,7 @@ def _test_ridge_cv(filter_):
def _test_ridge_diabetes(filter_):
ridge = Ridge(fit_intercept=False)
ridge.fit(filter_(X_diabetes), y_diabetes)
return np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)
yield np.round(ridge.score(filter_(X_diabetes), y_diabetes), 5)


def _test_multi_ridge_diabetes(filter_):
Expand Down Expand Up @@ -303,20 +303,39 @@ def _test_tolerance(filter_):
assert_true(score >= score2)


def _test_intercept_values(filter_):
"""Tests that the learned model doesn't change if the array is sparse

Issue #1389
"""
for n_samples, n_features in ((5, 3), (3, 5)): # Test both tall and wide
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features)
y = rng.randn(n_samples)
for normalize in (False, True):
ridge = Ridge(alpha=1e-10, fit_intercept=True, normalize=normalize)
# almost no regularization, but not singular
ridge.fit(filter_(X), y)
yield ridge.coef_
yield ridge.intercept_


def test_dense_sparse():
for test_func in (_test_ridge_loo,
_test_ridge_cv,
_test_ridge_diabetes,
_test_multi_ridge_diabetes,
_test_ridge_classifiers,
_test_tolerance):
_test_tolerance,
_test_intercept_values):
# test dense matrix
ret_dense = test_func(DENSE_FILTER)
dense_returns = test_func(DENSE_FILTER)
# test sparse matrix
ret_sparse = test_func(SPARSE_FILTER)
sparse_returns = test_func(SPARSE_FILTER)
# test that the outputs are the same
if ret_dense is not None and ret_sparse is not None:
assert_array_almost_equal(ret_dense, ret_sparse, decimal=3)
if dense_returns is not None:
for ret_dense, ret_sparse in zip(dense_returns, sparse_returns):
assert_array_almost_equal(ret_dense, ret_sparse, decimal=3)


def test_class_weights():
Expand Down