scikit-learn · MechCoder · Feb 11, 2015 · amueller · Feb 11, 2015 · MechCoder
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
@@ -9,7 +9,7 @@
 from ..utils.validation import NotFittedError, check_is_fitted
 
 
-class _LearntSelectorMixin(TransformerMixin):
+class _LearntSelectorMixin(object):
     # Note because of the extra threshold parameter in transform, this does
     # not naturally extend from SelectorMixin
     """Transformer mixin selecting features based on importance weights.
@@ -27,21 +27,24 @@ def transform(self, X, threshold=None):
 
         Parameters
         ----------
-        X : array or scipy sparse matrix of shape [n_samples, n_features]
+        X : array or scipy sparse matrix of shape (n_samples, n_features)
             The input samples.
 
-        threshold : string, float or None, optional (default=None)
-            The threshold value to use for feature selection. Features whose
-            importance is greater or equal are kept while the others are
-            discarded. If "median" (resp. "mean"), then the threshold value is
-            the median (resp. the mean) of the feature importances. A scaling
-            factor (e.g., "1.25*mean") may also be used. If None and if
-            available, the object attribute ``threshold`` is used. Otherwise,
-            "mean" is used by default.
+        threshold : string or float, optional
+            The threshold value to use for feature selection. Features
+            whose importance is greater or equal are kept while the others
+            are discarded.
+
+            1. If the penalty used is "l1", then the threshold value
+               is 1e-5 times the maximum coefficient.
+            2. The default threshold used otherwise is that which is
+               provided in the object, if not present than the mean is used.
+            3. The threshold provided can be a float, "mean", "median"
+               or multiplied by a scaling factor, such as ("1.25*mean").
 
         Returns
         -------
-        X_r : array of shape [n_samples, n_selected_features]
+        X_r : array of shape (n_samples, n_selected_features)
             The input samples with only the selected features.
         """
         check_is_fitted(self, ('coef_', 'feature_importances_'), 
@@ -68,11 +71,16 @@ def transform(self, X, threshold=None):
 
         # Retrieve threshold
         if threshold is None:
-            if hasattr(self, "penalty") and self.penalty == "l1":
+            threshold = getattr(self, "threshold", None)
+
+        if threshold is None:
+            # Lasso has a l1 penalty but no penalty param.
+            if (hasattr(self, "penalty") and self.penalty == "l1" or
+                'Lasso' in self.__class__.__name__):
                 # the natural default threshold is 0 when l1 penalty was used
-                threshold = getattr(self, "threshold", 1e-5)
+                threshold = 1e-5 * np.max(importances)
             else:
-                threshold = getattr(self, "threshold", "mean")
+                threshold = "mean"
 
         if isinstance(threshold, six.string_types):
             if "*" in threshold:

diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
@@ -5,10 +5,11 @@
 
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_greater
+from sklearn.utils.testing import assert_array_equal
 
 from sklearn.datasets import load_iris
-from sklearn.linear_model import LogisticRegression
-from sklearn.linear_model import SGDClassifier
+from sklearn.linear_model import (
+    LogisticRegression, SGDClassifier, Lasso, LassoCV)
 from sklearn.svm import LinearSVC
 
 iris = load_iris()
@@ -41,3 +42,17 @@ def test_invalid_input():
     clf.fit(iris.data, iris.target)
     assert_raises(ValueError, clf.transform, iris.data, "gobbledigook")
     assert_raises(ValueError, clf.transform, iris.data, ".5 * gobbledigook")
+
+
+def test_transform_elastic_net_lasso():
+    """Test that default threshold in Lasso Models ~= 0"""
+    rng = np.random.RandomState(0)
+    X, y = rng.rand(5, 5), rng.rand(5)
+    coef = rng.rand(5)
+    coef[[1, 2]] = 0.0
+    for model in [LassoCV(), Lasso()]:
+        model.fit(X, y)
+
+        # Rewrite coefficients just to check that transform works.
+        model.coef_ = coef
+        assert_array_equal(model.transform(X), X[:, [0, 3, 4]])
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
@@ -25,6 +25,7 @@
 from ..externals import six
 from ..externals.joblib import Parallel, delayed
 from ..base import BaseEstimator, ClassifierMixin, RegressorMixin
+from ..feature_selection.from_model import _LearntSelectorMixin
 from ..utils import as_float_array, check_array
 from ..utils.extmath import safe_sparse_dot
 from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale
@@ -113,7 +114,8 @@ def center_data(X, y, fit_intercept, normalize=False, copy=True,
     return X, y, X_mean, y_mean, X_std
 
 
-class LinearModel(six.with_metaclass(ABCMeta, BaseEstimator)):
+class LinearModel(six.with_metaclass(ABCMeta, BaseEstimator),
+                  _LearntSelectorMixin):
     """Base class for Linear Models"""
 
     @abstractmethod

diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py
@@ -21,6 +21,7 @@
 from ..externals.joblib import Parallel, delayed
 from ..externals import six
 from ..externals.six.moves import xrange
+from ..feature_selection.from_model import _LearntSelectorMixin
 from ..utils.extmath import safe_sparse_dot
 from ..utils.validation import check_is_fitted
 from ..utils import ConvergenceWarning
@@ -935,7 +936,7 @@ def _path_residuals(X, y, train, test, path, path_params, alphas=None,
     return this_mses
 
 
-class LinearModelCV(six.with_metaclass(ABCMeta, LinearModel)):
+class LinearModelCV(six.with_metaclass(ABCMeta, LinearModel), _LearntSelectorMixin):
     """Base class for iterative model fitting along a regularization path"""
 
     @abstractmethod