Skip to content

[MRG+1] remove n_nonzero_coefs from attr of LassoLarsCV + clean up call hierarchy #9004

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 66 additions & 35 deletions sklearn/linear_model/least_angle.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
sys.stdout.write('.')
sys.stdout.flush()

tiny = np.finfo(np.float).tiny # to avoid division by 0 warning
tiny32 = np.finfo(np.float32).tiny # to avoid division by 0 warning
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the record tiny32 was introduced here 294d4b6

equality_tolerance = np.finfo(np.float32).eps

Expand Down Expand Up @@ -300,9 +299,11 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
'Dropping a regressor, after %i iterations, '
'i.e. alpha=%.3e, '
'with an active set of %i regressors, and '
'the smallest cholesky pivot element being %.3e'
'the smallest cholesky pivot element being %.3e.'
' Reduce max_iter or increase eps parameters.'
% (n_iter, alpha, n_active, diag),
ConvergenceWarning)

# XXX: need to figure a 'drop for good' way
Cov = Cov_not_shortened
Cov[0] = 0
Expand Down Expand Up @@ -370,11 +371,11 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
corr_eq_dir = np.dot(Gram[:n_active, n_active:].T,
least_squares)

g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny32))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tiny was not enough for #8475

and I think that 1.1754944e-38 is small enough (rather than 2.2250738585072014e-308)

if positive:
gamma_ = min(g1, C / AA)
else:
g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))
g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny32))
gamma_ = min(g1, g2, C / AA)

# TODO: better names for these variables: z
Expand Down Expand Up @@ -608,28 +609,8 @@ def _get_gram(self):
Gram = None
return Gram

def fit(self, X, y, Xy=None):
"""Fit the model using X, y as training data.

parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.

y : array-like, shape (n_samples,) or (n_samples, n_targets)
Target values.

Xy : array-like, shape (n_samples,) or (n_samples, n_targets), \
optional
Xy = np.dot(X.T, y) that can be precomputed. It is useful
only when the Gram matrix is precomputed.

returns
-------
self : object
returns an instance of self.
"""
X, y = check_X_y(X, y, y_numeric=True, multi_output=True)
def _fit(self, X, y, max_iter, alpha, fit_path, Xy=None):
"""Auxiliary method to fit the model using X, y as training data"""
n_features = X.shape[1]

X, y, X_offset, y_offset, X_scale = self._preprocess_data(X, y,
Expand All @@ -642,13 +623,6 @@ def fit(self, X, y, Xy=None):

n_targets = y.shape[1]

alpha = getattr(self, 'alpha', 0.)
if hasattr(self, 'n_nonzero_coefs'):
alpha = 0. # n_nonzero_coefs parametrization takes priority
max_iter = self.n_nonzero_coefs
else:
max_iter = self.max_iter
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this has been moved to Lars.fit to avoid the magic of hacking instance attributes


precompute = self.precompute
if not hasattr(precompute, '__array__') and (
precompute is True or
Expand All @@ -662,7 +636,7 @@ def fit(self, X, y, Xy=None):
self.n_iter_ = []
self.coef_ = np.empty((n_targets, n_features))

if self.fit_path:
if fit_path:
self.active_ = []
self.coef_path_ = []
for k in xrange(n_targets):
Expand Down Expand Up @@ -698,9 +672,45 @@ def fit(self, X, y, Xy=None):
if n_targets == 1:
self.alphas_ = self.alphas_[0]
self.n_iter_ = self.n_iter_[0]

self._set_intercept(X_offset, y_offset, X_scale)
return self

def fit(self, X, y, Xy=None):
"""Fit the model using X, y as training data.

Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.

y : array-like, shape (n_samples,) or (n_samples, n_targets)
Target values.

Xy : array-like, shape (n_samples,) or (n_samples, n_targets), \
optional
Xy = np.dot(X.T, y) that can be precomputed. It is useful
only when the Gram matrix is precomputed.

Returns
-------
self : object
returns an instance of self.
"""
X, y = check_X_y(X, y, y_numeric=True, multi_output=True)

alpha = getattr(self, 'alpha', 0.)
if hasattr(self, 'n_nonzero_coefs'):
alpha = 0. # n_nonzero_coefs parametrization takes priority
max_iter = self.n_nonzero_coefs
else:
max_iter = self.max_iter

self._fit(X, y, max_iter=max_iter, alpha=alpha, fit_path=self.fit_path,
Xy=Xy)

return self


class LassoLars(Lars):
"""Lasso model fit with Least Angle Regression a.k.a. Lars
Expand Down Expand Up @@ -1145,10 +1155,13 @@ def fit(self, X, y):
# Now compute the full model
# it will call a lasso internally when self if LassoLarsCV
# as self.method == 'lasso'
Lars.fit(self, X, y)
self._fit(X, y, max_iter=self.max_iter, alpha=best_alpha,
Xy=None, fit_path=True)
return self

@property
@deprecated("Attribute alpha is deprecated in 0.19 and "
"will be removed in 0.21. See 'alpha_' instead")
def alpha(self):
# impedance matching for the above Lars.fit (should not be documented)
return self.alpha_
Expand Down Expand Up @@ -1283,6 +1296,24 @@ class LassoLarsCV(LarsCV):

method = 'lasso'

def __init__(self, fit_intercept=True, verbose=False, max_iter=500,
normalize=True, precompute='auto', cv=None,
max_n_alphas=1000, n_jobs=1, eps=np.finfo(np.float).eps,
copy_X=True, positive=False):
self.fit_intercept = fit_intercept
self.verbose = verbose
self.max_iter = max_iter
self.normalize = normalize
self.precompute = precompute
self.cv = cv
self.max_n_alphas = max_n_alphas
self.n_jobs = n_jobs
self.eps = eps
self.copy_X = copy_X
self.positive = positive
# XXX : we don't use super(LarsCV, self).__init__
# to avoid setting n_nonzero_coefs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically it tells us that we have the wrong inheritance diagram. But... I don't bother.



class LassoLarsIC(LassoLars):
"""Lasso model fit with Lars using BIC or AIC for model selection
Expand Down
16 changes: 16 additions & 0 deletions sklearn/linear_model/tests/test_least_angle.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import warnings

import numpy as np
from scipy import linalg

from sklearn.model_selection import train_test_split
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_true
from sklearn.utils.testing import assert_false
from sklearn.utils.testing import assert_less
from sklearn.utils.testing import assert_greater
from sklearn.utils.testing import assert_raises
Expand Down Expand Up @@ -402,6 +405,19 @@ def test_lars_cv():
lars_cv.fit(X, y)
np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
old_alpha = lars_cv.alpha_
assert_false(hasattr(lars_cv, 'n_nonzero_coefs'))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a test for the ignored max_iter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done



def test_lars_cv_max_iter():
with warnings.catch_warnings(record=True) as w:
X = diabetes.data
y = diabetes.target
rng = np.random.RandomState(42)
x = rng.randn(len(y))
X = np.c_[X, x, x] # add correlated features
lars_cv = linear_model.LassoLarsCV(max_iter=5)
lars_cv.fit(X, y)
assert_true(len(w) == 0)


def test_lasso_lars_ic():
Expand Down