scikit-learn · jnothman · May 6, 2013 · May 21, 2013
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -88,6 +88,9 @@ Changelog
    - A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output
      incorrect probabilities has been fixed.
 
+   - Feature selectors now share a mixin providing consistent `transform`,
+     `inverse_transform` and `get_support` methods. By `Joel Nothman`_.
+
 
 API changes summary
 -------------------

diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py
@@ -46,7 +46,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 from ..externals.joblib import Parallel, delayed, cpu_count
 from ..externals import six
 from ..externals.six.moves import xrange
-from ..feature_selection.selector_mixin import SelectorMixin
+from ..feature_selection.from_model import _LearntSelectorMixin
 from ..metrics import r2_score
 from ..preprocessing import OneHotEncoder
 from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor,
@@ -207,7 +207,8 @@ def _partition_features(forest, n_total_features):
     return n_jobs, n_features, starts
 
 
-class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble, SelectorMixin)):
+class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble,
+                                    _LearntSelectorMixin)):
     """Base class for forests of trees.
 
     Warning: This class should not be used directly. Use derived classes

diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+"""Generic feature selection mixin"""
+
+# Authors: G. Varoquaux, A. Gramfort, L. Buitinck, J. Nothman
+# License: BSD 3 clause
+
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from scipy.sparse import issparse, csc_matrix
+
+from ..base import TransformerMixin
+from ..utils import array2d, atleast2d_or_csr, safe_mask
+from ..externals import six
+
+
+class SelectorMixin(six.with_metaclass(ABCMeta, TransformerMixin)):
+    """
+    Tranformer mixin that performs feature selection given a support mask
+
+    This mixin provides a feature selector implementation with `transform` and
+    `inverse_transform` functionality given an implementation of
+    `_get_support_mask`.
+    """
+
+    def get_support(self, indices=False):
+        """
+        Get a mask, or integer index, of the features selected
+
+        Parameters
+        ----------
+        indices : boolean (default False)
+            If True, the return value will be an array of integers, rather
+            than a boolean mask.
+
+        Returns
+        -------
+        support : array
+            An index that selects the retained features from a feature vector.
+            If `indices` is False, this is a boolean array of shape
+            [# input features], in which an element is True iff its
+            corresponding feature is selected for retention. If `indices` is
+            True, this is an integer array of shape [# output features] whose
+            values are indices into the input feature vector.
+        """
+        mask = self._get_support_mask()
+        return mask if not indices else np.where(mask)[0]
+
+    @abstractmethod
+    def _get_support_mask(self):
+        """
+        Get the boolean mask indicating which features are selected
+
+        Returns
+        -------
+        support : boolean array of shape [# input features]
+            An element is True iff its corresponding feature is selected for
+            retention.
+        """
+
+    def transform(self, X):
+        """Reduce X to the selected features.
+
+        Parameters
+        ----------
+        X : array of shape [n_samples, n_features]
+            The input samples.
+
+        Returns
+        -------
+        X_r : array of shape [n_samples, n_selected_features]
+            The input samples with only the selected features.
+        """
+        X = atleast2d_or_csr(X)
+        mask = self.get_support()
+        if len(mask) != X.shape[1]:
+            raise ValueError("X has a different shape than during fitting.")
+        return atleast2d_or_csr(X)[:, safe_mask(X, mask)]
+
+    def inverse_transform(self, X):
+        """
+        Reverse the transformation operation
+
+        Returns 
+
+        Parameters
+        ----------
+        X : array of shape [n_samples, n_selected_features]
+            The input samples.
+
+        Returns
+        -------
+        X_r : array of shape [n_samples, n_original_features]
+            `X` with columns of zeros inserted where features would have
+            been removed by `transform`.
+        """
+        if issparse(X):
+            X = X.tocsc()
+            # insert additional entries in indptr:
+            # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
+            # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
+            col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel()
+            indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
+            Xt = csc_matrix((X.data, X.indices, indptr),
+                            shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)
+            return Xt
+
+        support = self.get_support()
+        X = array2d(X)
+        if support.sum() != X.shape[1]:
+            raise ValueError("X has a different shape than during fitting.")
+
+        if X.ndim == 1:
+            X = X[None, :]
+        Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)
+        Xt[:, support] = X
+        return Xt
+
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
@@ -0,0 +1,110 @@
+# Authors: Gilles Louppe, Mathieu Blondel
+# License: BSD 3 clause
+
+import numpy as np
+
+from ..base import TransformerMixin
+from ..externals import six
+from ..utils import safe_mask, atleast2d_or_csc
+
+
+class _LearntSelectorMixin(TransformerMixin):
+    # Note because of the extra threshold parameter in transform, this does
+    # not naturally extend from SelectorMixin
+    """Transformer mixin selecting features based on importance weights.
+
+    This implementation can be mixin on any estimator that exposes a
+    ``feature_importances_`` or ``coef_`` attribute to evaluate the relative
+    importance of individual features for feature selection.
+    """
+    def transform(self, X, threshold=None):
+        """Reduce X to its most important features.
+
+        Parameters
+        ----------
+        X : array or scipy sparse matrix of shape [n_samples, n_features]
+            The input samples.
+
+        threshold : string, float or None, optional (default=None)
+            The threshold value to use for feature selection. Features whose
+            importance is greater or equal are kept while the others are
+            discarded. If "median" (resp. "mean"), then the threshold value is
+            the median (resp. the mean) of the feature importances. A scaling
+            factor (e.g., "1.25*mean") may also be used. If None and if
+            available, the object attribute ``threshold`` is used. Otherwise,
+            "mean" is used by default.
+
+        Returns
+        -------
+        X_r : array of shape [n_samples, n_selected_features]
+            The input samples with only the selected features.
+        """
+        X = atleast2d_or_csc(X)
+        # Retrieve importance vector
+        if hasattr(self, "feature_importances_"):
+            importances = self.feature_importances_
+            if importances is None:
+                raise ValueError("Importance weights not computed. Please set"
+                                 " the compute_importances parameter before "
+                                 "fit.")
+
+        elif hasattr(self, "coef_"):
+            if self.coef_.ndim == 1:
+                importances = np.abs(self.coef_)
+
+            else:
+                importances = np.sum(np.abs(self.coef_), axis=0)
+
+        else:
+            raise ValueError("Missing `feature_importances_` or `coef_`"
+                             " attribute, did you forget to set the "
+                             "estimator's parameter to compute it?")
+        if len(importances) != X.shape[1]:
+            raise ValueError("X has different number of features than"
+                             " during model fitting.")
+
+        # Retrieve threshold
+        if threshold is None:
+            if hasattr(self, "penalty") and self.penalty == "l1":
+                # the natural default threshold is 0 when l1 penalty was used
+                threshold = getattr(self, "threshold", 1e-5)
+            else:
+                threshold = getattr(self, "threshold", "mean")
+
+        if isinstance(threshold, six.string_types):
+            if "*" in threshold:
+                scale, reference = threshold.split("*")
+                scale = float(scale.strip())
+                reference = reference.strip()
+
+                if reference == "median":
+                    reference = np.median(importances)
+                elif reference == "mean":
+                    reference = np.mean(importances)
+                else:
+                    raise ValueError("Unknown reference: " + reference)
+
+                threshold = scale * reference
+
+            elif threshold == "median":
+                threshold = np.median(importances)
+
+            elif threshold == "mean":
+                threshold = np.mean(importances)
+
+        else:
+            threshold = float(threshold)
+
+        # Selection
+        try:
+            mask = importances >= threshold
+        except TypeError:
+            # Fails in Python 3.x when threshold is str;
+            # result is array of True
+            raise ValueError("Invalid threshold: all features are discarded.")
+
+        if np.any(mask):
+            mask = safe_mask(X, mask)
+            return X[:, mask]
+        else:
+            raise ValueError("Invalid threshold: all features are discarded.")
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
@@ -7,15 +7,16 @@
 """Recursive feature elimination for feature ranking"""
 
 import numpy as np
-from ..utils import check_arrays, safe_sqr, safe_mask, atleast2d_or_csc
+from ..utils import check_arrays, safe_sqr
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
 from ..base import clone
 from ..base import is_classifier
 from ..cross_validation import check_cv
+from .base import SelectorMixin
 
 
-class RFE(BaseEstimator, MetaEstimatorMixin):
+class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin):
     """Feature ranking with recursive feature elimination.
 
     Given an external estimator that assigns weights to features (e.g., the
@@ -196,21 +197,8 @@ def score(self, X, y):
         """
         return self.estimator_.score(self.transform(X), y)
 
-    def transform(self, X):
-        """Reduce X to the selected features during the elimination.
-
-        Parameters
-        ----------
-        X : array of shape [n_samples, n_features]
-            The input samples.
-
-        Returns
-        -------
-        X_r : array of shape [n_samples, n_selected_features]
-            The input samples with only the features selected during the \
-            elimination.
-        """
-        return atleast2d_or_csc(X)[:, safe_mask(X, self.support_)]
+    def _get_support_mask(self):
+        return self.support_
 
     def decision_function(self, X):
         return self.estimator_.decision_function(self.transform(X))