From 53350ede8ef3ecd250d152f3d2cf535cd22c7ecc Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Mon, 6 May 2013 20:36:16 +1000 Subject: [PATCH 1/2] ENH Create FeatureSelectionMixin for shared [inverse_]transform code Also rename FeatureSelectionMixin -> SelectorMixin -> _LearntSelectorMixin And rename sklearn.feature_selection.{selector_mixin -> from_model} --- sklearn/ensemble/forest.py | 5 +- sklearn/feature_selection/base.py | 118 ++++++++++++++++++ sklearn/feature_selection/from_model.py | 110 ++++++++++++++++ sklearn/feature_selection/rfe.py | 22 +--- sklearn/feature_selection/selector_mixin.py | 113 ++--------------- sklearn/feature_selection/tests/test_base.py | 117 +++++++++++++++++ ...t_selector_mixin.py => test_from_model.py} | 0 .../feature_selection/univariate_selection.py | 56 +-------- sklearn/linear_model/logistic.py | 6 +- sklearn/linear_model/perceptron.py | 4 +- sklearn/linear_model/stochastic_gradient.py | 6 +- sklearn/svm/classes.py | 4 +- sklearn/tree/tree.py | 4 +- 13 files changed, 378 insertions(+), 187 deletions(-) create mode 100644 sklearn/feature_selection/base.py create mode 100644 sklearn/feature_selection/from_model.py create mode 100644 sklearn/feature_selection/tests/test_base.py rename sklearn/feature_selection/tests/{test_selector_mixin.py => test_from_model.py} (100%) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 1bea9a51642d4..ea4dd87d5850d 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -46,7 +46,7 @@ class calls the ``fit`` method of each sub-estimator on random samples from ..externals.joblib import Parallel, delayed, cpu_count from ..externals import six from ..externals.six.moves import xrange -from ..feature_selection.selector_mixin import SelectorMixin +from ..feature_selection.from_model import _LearntSelectorMixin from ..metrics import r2_score from ..preprocessing import OneHotEncoder from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor, @@ -207,7 +207,8 @@ def _partition_features(forest, n_total_features): return n_jobs, n_features, starts -class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble, SelectorMixin)): +class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble, + _LearntSelectorMixin)): """Base class for forests of trees. Warning: This class should not be used directly. Use derived classes diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py new file mode 100644 index 0000000000000..86dbee6209d58 --- /dev/null +++ b/sklearn/feature_selection/base.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +"""Generic feature selection mixin""" + +# Authors: G. Varoquaux, A. Gramfort, L. Buitinck, J. Nothman +# License: BSD 3 clause + +from abc import ABCMeta, abstractmethod + +import numpy as np +from scipy.sparse import issparse, csc_matrix + +from ..base import TransformerMixin +from ..utils import array2d, atleast2d_or_csr, safe_mask +from ..externals import six + + +class SelectorMixin(six.with_metaclass(ABCMeta, TransformerMixin)): + """ + Tranformer mixin that performs feature selection given a support mask + + This mixin provides a feature selector implementation with `transform` and + `inverse_transform` functionality given an implementation of + `_get_support_mask`. + """ + + def get_support(self, indices=False): + """ + Get a mask, or integer index, of the features selected + + Parameters + ---------- + indices : boolean (default False) + If True, the return value will be an array of integers, rather + than a boolean mask. + + Returns + ------- + support : array + An index that selects the retained features from a feature vector. + If `indices` is False, this is a boolean array of shape + [# input features], in which an element is True iff its + corresponding feature is selected for retention. If `indices` is + True, this is an integer array of shape [# output features] whose + values are indices into the input feature vector. + """ + mask = self._get_support_mask() + return mask if not indices else np.where(mask)[0] + + @abstractmethod + def _get_support_mask(self): + """ + Get the boolean mask indicating which features are selected + + Returns + ------- + support : boolean array of shape [# input features] + An element is True iff its corresponding feature is selected for + retention. + """ + + def transform(self, X): + """Reduce X to the selected features. + + Parameters + ---------- + X : array of shape [n_samples, n_features] + The input samples. + + Returns + ------- + X_r : array of shape [n_samples, n_selected_features] + The input samples with only the selected features. + """ + X = atleast2d_or_csr(X) + mask = self.get_support() + if len(mask) != X.shape[1]: + raise ValueError("X has a different shape than during fitting.") + return atleast2d_or_csr(X)[:, safe_mask(X, mask)] + + def inverse_transform(self, X): + """ + Reverse the transformation operation + + Returns + + Parameters + ---------- + X : array of shape [n_samples, n_selected_features] + The input samples. + + Returns + ------- + X_r : array of shape [n_samples, n_original_features] + `X` with columns of zeros inserted where features would have + been removed by `transform`. + """ + if issparse(X): + X = X.tocsc() + # insert additional entries in indptr: + # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3] + # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3] + col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel() + indptr = np.concatenate([[0], np.cumsum(col_nonzeros)]) + Xt = csc_matrix((X.data, X.indices, indptr), + shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype) + return Xt + + support = self.get_support() + X = array2d(X) + if support.sum() != X.shape[1]: + raise ValueError("X has a different shape than during fitting.") + + if X.ndim == 1: + X = X[None, :] + Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype) + Xt[:, support] = X + return Xt + diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py new file mode 100644 index 0000000000000..a0f7759c06312 --- /dev/null +++ b/sklearn/feature_selection/from_model.py @@ -0,0 +1,110 @@ +# Authors: Gilles Louppe, Mathieu Blondel +# License: BSD 3 clause + +import numpy as np + +from ..base import TransformerMixin +from ..externals import six +from ..utils import safe_mask, atleast2d_or_csc + + +class _LearntSelectorMixin(TransformerMixin): + # Note because of the extra threshold parameter in transform, this does + # not naturally extend from SelectorMixin + """Transformer mixin selecting features based on importance weights. + + This implementation can be mixin on any estimator that exposes a + ``feature_importances_`` or ``coef_`` attribute to evaluate the relative + importance of individual features for feature selection. + """ + def transform(self, X, threshold=None): + """Reduce X to its most important features. + + Parameters + ---------- + X : array or scipy sparse matrix of shape [n_samples, n_features] + The input samples. + + threshold : string, float or None, optional (default=None) + The threshold value to use for feature selection. Features whose + importance is greater or equal are kept while the others are + discarded. If "median" (resp. "mean"), then the threshold value is + the median (resp. the mean) of the feature importances. A scaling + factor (e.g., "1.25*mean") may also be used. If None and if + available, the object attribute ``threshold`` is used. Otherwise, + "mean" is used by default. + + Returns + ------- + X_r : array of shape [n_samples, n_selected_features] + The input samples with only the selected features. + """ + X = atleast2d_or_csc(X) + # Retrieve importance vector + if hasattr(self, "feature_importances_"): + importances = self.feature_importances_ + if importances is None: + raise ValueError("Importance weights not computed. Please set" + " the compute_importances parameter before " + "fit.") + + elif hasattr(self, "coef_"): + if self.coef_.ndim == 1: + importances = np.abs(self.coef_) + + else: + importances = np.sum(np.abs(self.coef_), axis=0) + + else: + raise ValueError("Missing `feature_importances_` or `coef_`" + " attribute, did you forget to set the " + "estimator's parameter to compute it?") + if len(importances) != X.shape[1]: + raise ValueError("X has different number of features than" + " during model fitting.") + + # Retrieve threshold + if threshold is None: + if hasattr(self, "penalty") and self.penalty == "l1": + # the natural default threshold is 0 when l1 penalty was used + threshold = getattr(self, "threshold", 1e-5) + else: + threshold = getattr(self, "threshold", "mean") + + if isinstance(threshold, six.string_types): + if "*" in threshold: + scale, reference = threshold.split("*") + scale = float(scale.strip()) + reference = reference.strip() + + if reference == "median": + reference = np.median(importances) + elif reference == "mean": + reference = np.mean(importances) + else: + raise ValueError("Unknown reference: " + reference) + + threshold = scale * reference + + elif threshold == "median": + threshold = np.median(importances) + + elif threshold == "mean": + threshold = np.mean(importances) + + else: + threshold = float(threshold) + + # Selection + try: + mask = importances >= threshold + except TypeError: + # Fails in Python 3.x when threshold is str; + # result is array of True + raise ValueError("Invalid threshold: all features are discarded.") + + if np.any(mask): + mask = safe_mask(X, mask) + return X[:, mask] + else: + raise ValueError("Invalid threshold: all features are discarded.") diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index 0bec9c7fe31d2..ff1c855a3b948 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -7,15 +7,16 @@ """Recursive feature elimination for feature ranking""" import numpy as np -from ..utils import check_arrays, safe_sqr, safe_mask, atleast2d_or_csc +from ..utils import check_arrays, safe_sqr from ..base import BaseEstimator from ..base import MetaEstimatorMixin from ..base import clone from ..base import is_classifier from ..cross_validation import check_cv +from .base import SelectorMixin -class RFE(BaseEstimator, MetaEstimatorMixin): +class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin): """Feature ranking with recursive feature elimination. Given an external estimator that assigns weights to features (e.g., the @@ -196,21 +197,8 @@ def score(self, X, y): """ return self.estimator_.score(self.transform(X), y) - def transform(self, X): - """Reduce X to the selected features during the elimination. - - Parameters - ---------- - X : array of shape [n_samples, n_features] - The input samples. - - Returns - ------- - X_r : array of shape [n_samples, n_selected_features] - The input samples with only the features selected during the \ - elimination. - """ - return atleast2d_or_csc(X)[:, safe_mask(X, self.support_)] + def _get_support_mask(self): + return self.support_ def decision_function(self, X): return self.estimator_.decision_function(self.transform(X)) diff --git a/sklearn/feature_selection/selector_mixin.py b/sklearn/feature_selection/selector_mixin.py index b616f82d0694f..dcb7ed8b79840 100644 --- a/sklearn/feature_selection/selector_mixin.py +++ b/sklearn/feature_selection/selector_mixin.py @@ -1,108 +1,11 @@ -# Authors: Gilles Louppe, Mathieu Blondel -# License: BSD 3 clause +import warnings -import numpy as np +warnings.warn("sklearn.feature_selection.selector_mixin.SelectorMixin " + "has been renamed " + "sklearn.feature_selection.from_model._LearntSelectorMixin, " + "and this alias will be removed in version 0.16", + DeprecationWarning) -from ..base import TransformerMixin -from ..externals import six -from ..utils import safe_mask, atleast2d_or_csc +from .from_model import _LearntSelectorMixin as SelectorMixin - -class SelectorMixin(TransformerMixin): - """Transformer mixin selecting features based on importance weights. - - This implementation can be mixin on any estimator that exposes a - ``feature_importances_`` or ``coef_`` attribute to evaluate the relative - importance of individual features for feature selection. - """ - def transform(self, X, threshold=None): - """Reduce X to its most important features. - - Parameters - ---------- - X : array or scipy sparse matrix of shape [n_samples, n_features] - The input samples. - - threshold : string, float or None, optional (default=None) - The threshold value to use for feature selection. Features whose - importance is greater or equal are kept while the others are - discarded. If "median" (resp. "mean"), then the threshold value is - the median (resp. the mean) of the feature importances. A scaling - factor (e.g., "1.25*mean") may also be used. If None and if - available, the object attribute ``threshold`` is used. Otherwise, - "mean" is used by default. - - Returns - ------- - X_r : array of shape [n_samples, n_selected_features] - The input samples with only the selected features. - """ - X = atleast2d_or_csc(X) - # Retrieve importance vector - if hasattr(self, "feature_importances_"): - importances = self.feature_importances_ - if importances is None: - raise ValueError("Importance weights not computed. Please set" - " the compute_importances parameter before " - "fit.") - - elif hasattr(self, "coef_"): - if self.coef_.ndim == 1: - importances = np.abs(self.coef_) - - else: - importances = np.sum(np.abs(self.coef_), axis=0) - - else: - raise ValueError("Missing `feature_importances_` or `coef_`" - " attribute, did you forget to set the " - "estimator's parameter to compute it?") - if len(importances) != X.shape[1]: - raise ValueError("X has different number of features than" - " during model fitting.") - - # Retrieve threshold - if threshold is None: - if hasattr(self, "penalty") and self.penalty == "l1": - # the natural default threshold is 0 when l1 penalty was used - threshold = getattr(self, "threshold", 1e-5) - else: - threshold = getattr(self, "threshold", "mean") - - if isinstance(threshold, six.string_types): - if "*" in threshold: - scale, reference = threshold.split("*") - scale = float(scale.strip()) - reference = reference.strip() - - if reference == "median": - reference = np.median(importances) - elif reference == "mean": - reference = np.mean(importances) - else: - raise ValueError("Unknown reference: " + reference) - - threshold = scale * reference - - elif threshold == "median": - threshold = np.median(importances) - - elif threshold == "mean": - threshold = np.mean(importances) - - else: - threshold = float(threshold) - - # Selection - try: - mask = importances >= threshold - except TypeError: - # Fails in Python 3.x when threshold is str; - # result is array of True - raise ValueError("Invalid threshold: all features are discarded.") - - if np.any(mask): - mask = safe_mask(X, mask) - return X[:, mask] - else: - raise ValueError("Invalid threshold: all features are discarded.") +__all__ = ['SelectorMixin'] diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py new file mode 100644 index 0000000000000..801e0672cfabe --- /dev/null +++ b/sklearn/feature_selection/tests/test_base.py @@ -0,0 +1,117 @@ +import string + +import numpy as np +from scipy import sparse as sp + +from nose.tools import assert_raises, assert_equal +from numpy.testing import assert_array_equal + +from sklearn.base import BaseEstimator +from sklearn.feature_selection.base import SelectorMixin +from sklearn.utils import atleast2d_or_csc + + +class StepSelector(SelectorMixin, BaseEstimator): + """Retain every `step` features (beginning with 0)""" + def __init__(self, step=2): + self.step = step + + def fit(self, X, y=None): + X = atleast2d_or_csc(X) + self.n_input_feats = X.shape[1] + return self + + def _get_support_mask(self): + mask = np.zeros(self.n_input_feats, dtype=bool) + mask[::self.step] = True + return mask + + +support = [True, False] * 5 +support_inds = [0, 2, 4, 6, 8] +X = np.arange(20).reshape(2, 10) +Xt = np.arange(0, 20, 2).reshape(2, 5) +Xinv = X.copy() +Xinv[:, 1::2] = 0 +y = [0, 1] +feature_names = list(string.uppercase[:10]) +feature_names_t = feature_names[::2] +feature_names_inv = np.array(feature_names) +feature_names_inv[1::2] = '' + + +def test_transform_dense(): + sel = StepSelector() + Xt_actual = sel.fit(X, y).transform(X) + Xt_actual2 = StepSelector().fit_transform(X, y) + assert_array_equal(Xt, Xt_actual) + assert_array_equal(Xt, Xt_actual2) + + # Check dtype matches + assert_equal(np.int32, sel.transform(X.astype(np.int32)).dtype) + assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype) + + # Check 1d list and other dtype: + names_t_actual = sel.transform(feature_names) + assert_array_equal(feature_names_t, names_t_actual.ravel()) + + # Check wrong shape raises error + assert_raises(ValueError, sel.transform, np.array([[1], [2]])) + + +def test_transform_sparse(): + sparse = sp.csc_matrix + sel = StepSelector() + Xt_actual = sel.fit(sparse(X)).transform(sparse(X)) + Xt_actual2 = sel.fit_transform(sparse(X)) + assert_array_equal(Xt, Xt_actual.todense()) + assert_array_equal(Xt, Xt_actual2.todense()) + + # Check dtype matches + assert_equal(np.int32, sel.transform(sparse(X).astype(np.int32)).dtype) + assert_equal(np.float32, sel.transform(sparse(X).astype(np.float32)).dtype) + + # Check wrong shape raises error + assert_raises(ValueError, sel.transform, np.array([[1], [2]])) + + +def test_inverse_transform_dense(): + sel = StepSelector() + Xinv_actual = sel.fit(X, y).inverse_transform(Xt) + assert_array_equal(Xinv, Xinv_actual) + + # Check dtype matches + assert_equal(np.int32, + sel.inverse_transform(Xt.astype(np.int32)).dtype) + assert_equal(np.float32, + sel.inverse_transform(Xt.astype(np.float32)).dtype) + + # Check 1d list and other dtype: + names_inv_actual = sel.inverse_transform(feature_names_t) + assert_array_equal(feature_names_inv, names_inv_actual.ravel()) + + # Check wrong shape raises error + assert_raises(ValueError, sel.inverse_transform, np.array([[1], [2]])) + + +def test_inverse_transform_sparse(): + sparse = sp.csc_matrix + sel = StepSelector() + Xinv_actual = sel.fit(sparse(X)).inverse_transform(sparse(Xt)) + assert_array_equal(Xinv, Xinv_actual.todense()) + + # Check dtype matches + assert_equal(np.int32, + sel.inverse_transform(sparse(Xt).astype(np.int32)).dtype) + assert_equal(np.float32, + sel.inverse_transform(sparse(Xt).astype(np.float32)).dtype) + + # Check wrong shape raises error + assert_raises(ValueError, sel.inverse_transform, np.array([[1], [2]])) + + +def test_get_support(): + sel = StepSelector() + sel.fit(X, y) + assert_array_equal(support, sel.get_support()) + assert_array_equal(support_inds, sel.get_support(indices=True)) diff --git a/sklearn/feature_selection/tests/test_selector_mixin.py b/sklearn/feature_selection/tests/test_from_model.py similarity index 100% rename from sklearn/feature_selection/tests/test_selector_mixin.py rename to sklearn/feature_selection/tests/test_from_model.py diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py index 6d01dbcaf1560..bae08c3c9ff52 100644 --- a/sklearn/feature_selection/univariate_selection.py +++ b/sklearn/feature_selection/univariate_selection.py @@ -12,15 +12,16 @@ import numpy as np from scipy import stats -from scipy.sparse import issparse, csc_matrix +from scipy.sparse import issparse -from ..base import BaseEstimator, TransformerMixin +from ..base import BaseEstimator from ..preprocessing import LabelBinarizer -from ..utils import (array2d, as_float_array, atleast2d_or_csc, +from ..utils import (array2d, as_float_array, atleast2d_or_csr, check_arrays, safe_asarray, safe_sqr, safe_mask) from ..utils.extmath import safe_sparse_dot from ..externals import six +from .base import SelectorMixin def _clean_nans(scores): @@ -248,7 +249,7 @@ def f_regression(X, y, center=True): # Base classes class _BaseFilter(six.with_metaclass(ABCMeta, BaseEstimator, - TransformerMixin)): + SelectorMixin)): def __init__(self, score_func): """ Initialize the univariate feature selection. @@ -269,53 +270,6 @@ def __init__(self, score_func): def fit(self, X, y): """Run score function on (X, y) and get the appropriate features.""" - def get_support(self, indices=False): - """ - Return a mask, or list, of the features/indices selected. - """ - mask = self._get_support_mask() - return mask if not indices else np.where(mask)[0] - - @abstractmethod - def _get_support_mask(self): - """ - Must return a boolean mask indicating which features are selected. - """ - - def transform(self, X): - """ - Transform a new matrix using the selected features - """ - X = atleast2d_or_csc(X) - mask = self._get_support_mask() - if len(mask) != X.shape[1]: - raise ValueError("X has a different shape than during fitting.") - return X[:, safe_mask(X, mask)] - - def inverse_transform(self, X): - """ - Reverse the transformation operation - - Returns `X` with columns of zeros inserted where features would have - been removed by `transform`. - """ - support_ = self.get_support() - if issparse(X): - X = X.tocsc() - # insert additional entries in indptr: - # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3] - # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3] - col_nonzeros = self.inverse_transform(np.diff(X.indptr)).ravel() - indptr = np.concatenate([[0], np.cumsum(col_nonzeros)]) - Xt = csc_matrix((X.data, X.indices, indptr), - shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype) - return Xt - if X.ndim == 1: - X = X[None, :] - Xt = np.zeros((X.shape[0], support_.size), dtype=X.dtype) - Xt[:, support_] = X - return Xt - class _PvalueFilter(_BaseFilter): def fit(self, X, y): diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 74abcf7c8a57a..6e186360fbe8e 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1,12 +1,12 @@ import numpy as np from .base import LinearClassifierMixin, SparseCoefMixin -from ..feature_selection.selector_mixin import SelectorMixin +from ..feature_selection.from_model import _LearntSelectorMixin from ..svm.base import BaseLibLinear -class LogisticRegression(BaseLibLinear, LinearClassifierMixin, SelectorMixin, - SparseCoefMixin): +class LogisticRegression(BaseLibLinear, LinearClassifierMixin, + _LearntSelectorMixin, SparseCoefMixin): """Logistic Regression (aka logit, MaxEnt) classifier. In the multiclass case, the training algorithm uses a one-vs.-all (OvA) diff --git a/sklearn/linear_model/perceptron.py b/sklearn/linear_model/perceptron.py index 593255975d2d7..979a4b3841792 100644 --- a/sklearn/linear_model/perceptron.py +++ b/sklearn/linear_model/perceptron.py @@ -2,10 +2,10 @@ # License: BSD 3 clause from .stochastic_gradient import BaseSGDClassifier -from ..feature_selection.selector_mixin import SelectorMixin +from ..feature_selection.from_model import _LearntSelectorMixin -class Perceptron(BaseSGDClassifier, SelectorMixin): +class Perceptron(BaseSGDClassifier, _LearntSelectorMixin): """Perceptron Parameters diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index df6e4d1afa860..84e68628e00f6 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -14,7 +14,7 @@ from .base import LinearClassifierMixin, SparseCoefMixin from ..base import BaseEstimator, RegressorMixin -from ..feature_selection.selector_mixin import SelectorMixin +from ..feature_selection.from_model import _LearntSelectorMixin from ..utils import array2d, atleast2d_or_csr, check_arrays, deprecated from ..utils.extmath import safe_sparse_dot from ..externals import six @@ -524,7 +524,7 @@ def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=sample_weight) -class SGDClassifier(BaseSGDClassifier, SelectorMixin): +class SGDClassifier(BaseSGDClassifier, _LearntSelectorMixin): """Linear model fitted by minimizing a regularized empirical loss with SGD. SGD stands for Stochastic Gradient Descent: the gradient of the loss is @@ -937,7 +937,7 @@ def _fit_regressor(self, X, y, alpha, C, loss, learning_rate, self.intercept_ = np.atleast_1d(intercept) -class SGDRegressor(BaseSGDRegressor, SelectorMixin): +class SGDRegressor(BaseSGDRegressor, _LearntSelectorMixin): """Linear model fitted by minimizing a regularized empirical loss with SGD SGD stands for Stochastic Gradient Descent: the gradient of the loss is diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py index 9fa79a479c1db..4de7d181ab92a 100644 --- a/sklearn/svm/classes.py +++ b/sklearn/svm/classes.py @@ -1,10 +1,10 @@ from .base import BaseLibLinear, BaseSVC, BaseLibSVM from ..base import RegressorMixin from ..linear_model.base import LinearClassifierMixin, SparseCoefMixin -from ..feature_selection.selector_mixin import SelectorMixin +from ..feature_selection.from_model import _LearntSelectorMixin -class LinearSVC(BaseLibLinear, LinearClassifierMixin, SelectorMixin, +class LinearSVC(BaseLibLinear, LinearClassifierMixin, _LearntSelectorMixin, SparseCoefMixin): """Linear Support Vector Classification. diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 35125b4ed82f3..6aa92b26679ef 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -24,7 +24,7 @@ from ..base import BaseEstimator, ClassifierMixin, RegressorMixin from ..externals import six from ..externals.six.moves import xrange -from ..feature_selection.selector_mixin import SelectorMixin +from ..feature_selection.from_model import _LearntSelectorMixin from ..utils import array2d, check_random_state from ..utils.validation import check_arrays @@ -50,7 +50,7 @@ class BaseDecisionTree(six.with_metaclass(ABCMeta, BaseEstimator, - SelectorMixin)): + _LearntSelectorMixin)): """Base class for decision trees. Warning: This class should not be used directly. From d1e5dcccc68aa135bf3a264e38dd07edc7b2fc2b Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Tue, 21 May 2013 13:47:10 +1000 Subject: [PATCH 2/2] ENH Create FeatureSelectionMixin for shared [inverse_]transform code Also rename FeatureSelectionMixin -> SelectorMixin -> _LearntSelectorMixin And rename sklearn.feature_selection.{selector_mixin -> from_model} --- doc/whats_new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index b9a6a42540cd8..fa61d05c5ec35 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -88,6 +88,9 @@ Changelog - A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output incorrect probabilities has been fixed. + - Feature selectors now share a mixin providing consistent `transform`, + `inverse_transform` and `get_support` methods. By `Joel Nothman`_. + API changes summary -------------------