Skip to content

ENH Allow sample_weight and other fit_params in RFE #20380

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
fe3e2a9
fit parameters for rfe
fbidu Jun 26, 2021
5bc6a3a
Add news entry for rfe fit params
fbidu Jun 26, 2021
bb0fa22
Update doc/whats_new/v1.0.rst
fbidu Jun 26, 2021
b8db648
Update doc/whats_new/v1.0.rst
fbidu Jun 27, 2021
f9d0aee
Update sklearn/feature_selection/tests/test_rfe.py
fbidu Jun 27, 2021
6363257
Add @g-rutter as an author
fbidu Jun 27, 2021
0481d97
Refactor test_rfe_sample_weights
fbidu Jun 27, 2021
64a2401
Fix ranking_ attribute
fbidu Jun 27, 2021
d27c370
Fix ranking_ attribute
fbidu Jun 27, 2021
df4673d
Finish test refactor
fbidu Jun 27, 2021
2867efc
Merge branch 'fit-params-for-rfe' of github.com:fbidu/scikit-learn in…
fbidu Jun 27, 2021
2b8ee3a
Remove unused import
fbidu Jun 27, 2021
9f29e57
Improve RFE test and scoring with weights
fbidu Jul 24, 2021
d4d97d4
Remove old RFE test
fbidu Jul 24, 2021
96a32dc
Add adrinjalali as co-author
fbidu Jul 24, 2021
01d9f2d
Merge branch 'main' into fit-params-for-rfe
fbidu Jul 24, 2021
0819916
Remove unused imports
fbidu Jul 24, 2021
282689d
Merge branch 'fit-params-for-rfe' of github.com:fbidu/scikit-learn in…
fbidu Jul 24, 2021
6d88cc5
Update doc/whats_new/v1.0.rst
fbidu Jul 25, 2021
9d7235b
Update sklearn/feature_selection/_rfe.py
fbidu Jul 25, 2021
6f69ede
Update doc/whats_new/v1.0.rst
fbidu Jul 25, 2021
3924e1d
Update sklearn/feature_selection/_rfe.py
fbidu Jul 25, 2021
efbece3
Update sklearn/feature_selection/_rfe.py
fbidu Jul 25, 2021
e378755
Update sklearn/feature_selection/_rfe.py
fbidu Jul 25, 2021
285959e
Apply suggestions from code review
glemaitre Jul 27, 2021
65b3489
flake8
glemaitre Jul 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,11 @@ Changelog
when the variance threshold is negative.
:pr:`20207` by :user:`Tomohiro Endo <europeanplaice>`

- |Enhancement| :func:`feature_selection.RFE.fit` accepts additional estimator
parameters that are passed directly to the estimator's `fit` method.
:pr:`20380` by :user:`Iván Pulido <ijpulidos>`, :user:`Felipe Bidu <fbidu>`,
:user:`Gil Rutter <g-rutter>`, and :user:`Adrin Jalali <adrinjalali>`.

- |FIX| Fix a bug in :func:`isotonic.isotonic_regression` where the
`sample_weight` passed by a user were overwritten during the fit.
:pr:`20515` by :user:`Carsten Allefeld <allefeld>`.
Expand Down
24 changes: 17 additions & 7 deletions sklearn/feature_selection/_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def classes_(self):
"""
return self.estimator_.classes_

def fit(self, X, y):
def fit(self, X, y, **fit_params):
"""Fit the RFE model and then the underlying estimator on the selected features.

Parameters
Expand All @@ -203,14 +203,18 @@ def fit(self, X, y):
y : array-like of shape (n_samples,)
The target values.

**fit_params : dict
Additional parameters passed to the `fit` method of the underlying
estimator.

Returns
-------
self : object
Fitted estimator.
"""
return self._fit(X, y)
return self._fit(X, y, **fit_params)

def _fit(self, X, y, step_score=None):
def _fit(self, X, y, step_score=None, **fit_params):
# Parameter step_score controls the calculation of self.scores_
# step_score is not exposed to users
# and is used when implementing RFECV
Expand Down Expand Up @@ -269,7 +273,7 @@ def _fit(self, X, y, step_score=None):
if self.verbose > 0:
print("Fitting estimator with %d features." % np.sum(support_))

estimator.fit(X[:, features], y)
estimator.fit(X[:, features], y, **fit_params)

# Get importance and rank them
importances = _get_feature_importances(
Expand All @@ -296,7 +300,7 @@ def _fit(self, X, y, step_score=None):
# Set final attributes
features = np.arange(n_features)[support_]
self.estimator_ = clone(self.estimator)
self.estimator_.fit(X[:, features], y)
self.estimator_.fit(X[:, features], y, **fit_params)

# Compute step score when only n_features_to_select features left
if step_score:
Expand Down Expand Up @@ -325,7 +329,7 @@ def predict(self, X):
return self.estimator_.predict(self.transform(X))

@if_delegate_has_method(delegate="estimator")
def score(self, X, y):
def score(self, X, y, **fit_params):
"""Reduce X to the selected features and return the score of the underlying estimator.

Parameters
Expand All @@ -336,14 +340,20 @@ def score(self, X, y):
y : array of shape [n_samples]
The target values.

**fit_params : dict
Parameters to pass to the `score` method of the underlying
estimator.

.. versionadded:: 1.0

Returns
-------
score : float
Score of the underlying base estimator computed with the selected
features returned by `rfe.transform(X)` and `y`.
"""
check_is_fitted(self)
return self.estimator_.score(self.transform(X), y)
return self.estimator_.score(self.transform(X), y, **fit_params)

def _get_support_mask(self):
check_is_fitted(self)
Expand Down
26 changes: 26 additions & 0 deletions sklearn/feature_selection/tests/test_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from numpy.testing import assert_array_almost_equal, assert_array_equal
from scipy import sparse

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.feature_selection import RFE, RFECV
from sklearn.datasets import load_iris, make_friedman1
from sklearn.metrics import zero_one_loss
Expand Down Expand Up @@ -108,6 +109,31 @@ def test_rfe():
assert_array_almost_equal(X_r, X_r_sparse.toarray())


def test_RFE_fit_score_params():
# Make sure RFE passes the metadata down to fit and score methods of the
# underlying estimator
class TestEstimator(BaseEstimator, ClassifierMixin):
def fit(self, X, y, prop=None):
if prop is None:
raise ValueError("fit: prop cannot be None")
self.svc_ = SVC(kernel="linear").fit(X, y)
self.coef_ = self.svc_.coef_
return self

def score(self, X, y, prop=None):
if prop is None:
raise ValueError("score: prop cannot be None")
return self.svc_.score(X, y)

X, y = load_iris(return_X_y=True)
with pytest.raises(ValueError, match="fit: prop cannot be None"):
RFE(estimator=TestEstimator()).fit(X, y)
with pytest.raises(ValueError, match="score: prop cannot be None"):
RFE(estimator=TestEstimator()).fit(X, y, prop="foo").score(X, y)

RFE(estimator=TestEstimator()).fit(X, y, prop="foo").score(X, y, prop="foo")


@pytest.mark.parametrize("n_features_to_select", [-1, 2.1])
def test_rfe_invalid_n_features_errors(n_features_to_select):
clf = SVC(kernel="linear")
Expand Down