-
-
Notifications
You must be signed in to change notification settings - Fork 25.8k
[MRG+1] remove n_nonzero_coefs from attr of LassoLarsCV + clean up call hierarchy #9004
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8209200
1c2f4da
2da8e32
4fd1f4a
6f2c2a0
70338a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -195,7 +195,6 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, | |
sys.stdout.write('.') | ||
sys.stdout.flush() | ||
|
||
tiny = np.finfo(np.float).tiny # to avoid division by 0 warning | ||
tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning | ||
equality_tolerance = np.finfo(np.float32).eps | ||
|
||
|
@@ -300,9 +299,11 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, | |
'Dropping a regressor, after %i iterations, ' | ||
'i.e. alpha=%.3e, ' | ||
'with an active set of %i regressors, and ' | ||
'the smallest cholesky pivot element being %.3e' | ||
'the smallest cholesky pivot element being %.3e.' | ||
' Reduce max_iter or increase eps parameters.' | ||
% (n_iter, alpha, n_active, diag), | ||
ConvergenceWarning) | ||
|
||
# XXX: need to figure a 'drop for good' way | ||
Cov = Cov_not_shortened | ||
Cov[0] = 0 | ||
|
@@ -370,11 +371,11 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, | |
corr_eq_dir = np.dot(Gram[:n_active, n_active:].T, | ||
least_squares) | ||
|
||
g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny)) | ||
g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny32)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tiny was not enough for #8475 and I think that 1.1754944e-38 is small enough (rather than 2.2250738585072014e-308) |
||
if positive: | ||
gamma_ = min(g1, C / AA) | ||
else: | ||
g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny)) | ||
g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny32)) | ||
gamma_ = min(g1, g2, C / AA) | ||
|
||
# TODO: better names for these variables: z | ||
|
@@ -608,28 +609,8 @@ def _get_gram(self): | |
Gram = None | ||
return Gram | ||
|
||
def fit(self, X, y, Xy=None): | ||
"""Fit the model using X, y as training data. | ||
|
||
parameters | ||
---------- | ||
X : array-like, shape (n_samples, n_features) | ||
Training data. | ||
|
||
y : array-like, shape (n_samples,) or (n_samples, n_targets) | ||
Target values. | ||
|
||
Xy : array-like, shape (n_samples,) or (n_samples, n_targets), \ | ||
optional | ||
Xy = np.dot(X.T, y) that can be precomputed. It is useful | ||
only when the Gram matrix is precomputed. | ||
|
||
returns | ||
------- | ||
self : object | ||
returns an instance of self. | ||
""" | ||
X, y = check_X_y(X, y, y_numeric=True, multi_output=True) | ||
def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None): | ||
"""Auxiliary method to fit the model using X, y as training data""" | ||
n_features = X.shape[1] | ||
|
||
X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y, | ||
|
@@ -642,13 +623,6 @@ def fit(self, X, y, Xy=None): | |
|
||
n_targets = y.shape[1] | ||
|
||
alpha = getattr(self, 'alpha', 0.) | ||
if hasattr(self, 'n_nonzero_coefs'): | ||
alpha = 0. # n_nonzero_coefs parametrization takes priority | ||
max_iter = self.n_nonzero_coefs | ||
else: | ||
max_iter = self.max_iter | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this has been moved to Lars.fit to avoid the magic of hacking instance attributes |
||
|
||
precompute = self.precompute | ||
if not hasattr(precompute, '__array__') and ( | ||
precompute is True or | ||
|
@@ -662,7 +636,7 @@ def fit(self, X, y, Xy=None): | |
self.n_iter_ = [] | ||
self.coef_ = np.empty((n_targets, n_features)) | ||
|
||
if self.fit_path: | ||
if fit_path: | ||
self.active_ = [] | ||
self.coef_path_ = [] | ||
for k in xrange(n_targets): | ||
|
@@ -698,9 +672,45 @@ def fit(self, X, y, Xy=None): | |
if n_targets == 1: | ||
self.alphas_ = self.alphas_[0] | ||
self.n_iter_ = self.n_iter_[0] | ||
|
||
self._set_intercept(X_offset, y_offset, X_scale) | ||
return self | ||
|
||
def fit(self, X, y, Xy=None): | ||
"""Fit the model using X, y as training data. | ||
|
||
Parameters | ||
---------- | ||
X : array-like, shape (n_samples, n_features) | ||
Training data. | ||
|
||
y : array-like, shape (n_samples,) or (n_samples, n_targets) | ||
Target values. | ||
|
||
Xy : array-like, shape (n_samples,) or (n_samples, n_targets), \ | ||
optional | ||
Xy = np.dot(X.T, y) that can be precomputed. It is useful | ||
only when the Gram matrix is precomputed. | ||
|
||
Returns | ||
------- | ||
self : object | ||
returns an instance of self. | ||
""" | ||
X, y = check_X_y(X, y, y_numeric=True, multi_output=True) | ||
|
||
alpha = getattr(self, 'alpha', 0.) | ||
if hasattr(self, 'n_nonzero_coefs'): | ||
alpha = 0. # n_nonzero_coefs parametrization takes priority | ||
max_iter = self.n_nonzero_coefs | ||
else: | ||
max_iter = self.max_iter | ||
|
||
self._fit(X, y, max_iter=max_iter, alpha=alpha, fit_path=self.fit_path, | ||
Xy=Xy) | ||
|
||
return self | ||
|
||
|
||
class LassoLars(Lars): | ||
"""Lasso model fit with Least Angle Regression a.k.a. Lars | ||
|
@@ -1145,10 +1155,13 @@ def fit(self, X, y): | |
# Now compute the full model | ||
# it will call a lasso internally when self if LassoLarsCV | ||
# as self.method == 'lasso' | ||
Lars.fit(self, X, y) | ||
self._fit(X, y, max_iter=self.max_iter, alpha=best_alpha, | ||
Xy=None, fit_path=True) | ||
return self | ||
|
||
@property | ||
@deprecated("Attribute alpha is deprecated in 0.19 and " | ||
"will be removed in 0.21. See 'alpha_' instead") | ||
def alpha(self): | ||
# impedance matching for the above Lars.fit (should not be documented) | ||
return self.alpha_ | ||
|
@@ -1283,6 +1296,24 @@ class LassoLarsCV(LarsCV): | |
|
||
method = 'lasso' | ||
|
||
def __init__(self, fit_intercept=True, verbose=False, max_iter=500, | ||
normalize=True, precompute='auto', cv=None, | ||
max_n_alphas=1000, n_jobs=1, eps=np.finfo(np.float).eps, | ||
copy_X=True, positive=False): | ||
self.fit_intercept = fit_intercept | ||
self.verbose = verbose | ||
self.max_iter = max_iter | ||
self.normalize = normalize | ||
self.precompute = precompute | ||
self.cv = cv | ||
self.max_n_alphas = max_n_alphas | ||
self.n_jobs = n_jobs | ||
self.eps = eps | ||
self.copy_X = copy_X | ||
self.positive = positive | ||
# XXX : we don't use super(LarsCV, self).__init__ | ||
# to avoid setting n_nonzero_coefs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically it tells us that we have the wrong inheritance diagram. But... I don't bother. |
||
|
||
|
||
class LassoLarsIC(LassoLars): | ||
"""Lasso model fit with Lars using BIC or AIC for model selection | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,13 @@ | ||
import warnings | ||
|
||
import numpy as np | ||
from scipy import linalg | ||
|
||
from sklearn.model_selection import train_test_split | ||
from sklearn.utils.testing import assert_equal | ||
from sklearn.utils.testing import assert_array_almost_equal | ||
from sklearn.utils.testing import assert_true | ||
from sklearn.utils.testing import assert_false | ||
from sklearn.utils.testing import assert_less | ||
from sklearn.utils.testing import assert_greater | ||
from sklearn.utils.testing import assert_raises | ||
|
@@ -402,6 +405,19 @@ def test_lars_cv(): | |
lars_cv.fit(X, y) | ||
np.testing.assert_array_less(old_alpha, lars_cv.alpha_) | ||
old_alpha = lars_cv.alpha_ | ||
assert_false(hasattr(lars_cv, 'n_nonzero_coefs')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe a test for the ignored max_iter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
|
||
def test_lars_cv_max_iter(): | ||
with warnings.catch_warnings(record=True) as w: | ||
X = diabetes.data | ||
y = diabetes.target | ||
rng = np.random.RandomState(42) | ||
x = rng.randn(len(y)) | ||
X = np.c_[X, x, x] # add correlated features | ||
lars_cv = linear_model.LassoLarsCV(max_iter=5) | ||
lars_cv.fit(X, y) | ||
assert_true(len(w) == 0) | ||
|
||
|
||
def test_lasso_lars_ic(): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for the record tiny32 was introduced here 294d4b6