Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats_new/v0.21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,12 @@ Support for Python 3.4 and below has been officially dropped.
of the estimators was set to ``None`` and ``sample_weight`` was not ``None``.
:pr:`13779` by :user:`Guillaume Lemaitre <glemaitre>`.

- |API| :class:`ensemble.VotingClassifier` and
:class:`ensemble.VotingRegressor` accept ``'drop'`` to disable an estimator
in addition to ``None`` to be consistent with other estimators (i.e.,
:class:`pipeline.FeatureUnion` and :class:`compose.ColumnTransformer`).
:pr:`13780` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.externals`
........................

Expand Down
40 changes: 28 additions & 12 deletions sklearn/ensemble/tests/test_voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,12 +342,25 @@ def test_sample_weight():
assert_array_equal(eclf3.predict(X), clf1.predict(X))
assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))

# check that an error is raised and indicative if sample_weight is not
# supported.
clf4 = KNeighborsClassifier()
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('svc', clf3), ('knn', clf4)],
voting='soft')
msg = ('Underlying estimator \'knn\' does not support sample weights.')
assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight)
msg = ('Underlying estimator KNeighborsClassifier does not support '
'sample weights.')
with pytest.raises(ValueError, match=msg):
eclf3.fit(X, y, sample_weight)

# check that _parallel_fit_estimator will raise the right error
# it should raise the original error if this is not linked to sample_weight
class ClassifierErrorFit(BaseEstimator, ClassifierMixin):
def fit(self, X, y, sample_weight):
raise TypeError('Error unrelated to sample_weight.')
clf = ClassifierErrorFit()
with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
clf.fit(X, y, sample_weight=sample_weight)


def test_sample_weight_kwargs():
Expand Down Expand Up @@ -404,8 +417,10 @@ def test_set_params():
@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
@pytest.mark.filterwarnings('ignore:The default value of n_estimators')
def test_set_estimator_none():
"""VotingClassifier set_params should be able to set estimators as None"""
@pytest.mark.parametrize("drop", [None, 'drop'])
def test_set_estimator_none(drop):
"""VotingClassifier set_params should be able to set estimators as None or
drop"""
# Test predict
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
Expand All @@ -417,22 +432,22 @@ def test_set_estimator_none():
eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
('nb', clf3)],
voting='hard', weights=[1, 1, 0.5])
eclf2.set_params(rf=None).fit(X, y)
eclf2.set_params(rf=drop).fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))

assert dict(eclf2.estimators)["rf"] is None
assert dict(eclf2.estimators)["rf"] is drop
assert len(eclf2.estimators_) == 2
assert all(isinstance(est, (LogisticRegression, GaussianNB))
for est in eclf2.estimators_)
assert eclf2.get_params()["rf"] is None
assert eclf2.get_params()["rf"] is drop

eclf1.set_params(voting='soft').fit(X, y)
eclf2.set_params(voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
msg = 'All estimators are None. At least one is required!'
msg = 'All estimators are None or "drop". At least one is required!'
assert_raise_message(
ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y)
ValueError, msg, eclf2.set_params(lr=drop, rf=drop, nb=drop).fit, X, y)

# Test soft voting transform
X1 = np.array([[1], [2]])
Expand All @@ -444,7 +459,7 @@ def test_set_estimator_none():
eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)],
voting='soft', weights=[1, 0.5],
flatten_transform=False)
eclf2.set_params(rf=None).fit(X1, y1)
eclf2.set_params(rf=drop).fit(X1, y1)
assert_array_almost_equal(eclf1.transform(X1),
np.array([[[0.7, 0.3], [0.3, 0.7]],
[[1., 0.], [0., 1.]]]))
Expand Down Expand Up @@ -522,12 +537,13 @@ def test_transform():
[('lr', LinearRegression()),
('rf', RandomForestRegressor(n_estimators=5))]))]
)
def test_none_estimator_with_weights(X, y, voter):
@pytest.mark.parametrize("drop", [None, 'drop'])
def test_none_estimator_with_weights(X, y, voter, drop):
# check that an estimator can be set to None and passing some weight
# regression test for
# https://github.com/scikit-learn/scikit-learn/issues/13777
voter.fit(X, y, sample_weight=np.ones(y.shape))
voter.set_params(lr=None)
voter.set_params(lr=drop)
voter.fit(X, y, sample_weight=np.ones(y.shape))
y_pred = voter.predict(X)
assert y_pred.shape == y.shape
44 changes: 24 additions & 20 deletions sklearn/ensemble/voting.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,15 @@
def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
"""Private function used to fit an estimator within a job."""
if sample_weight is not None:
estimator.fit(X, y, sample_weight=sample_weight)
try:
estimator.fit(X, y, sample_weight=sample_weight)
except TypeError as exc:
if "unexpected keyword argument 'sample_weight'" in str(exc):
raise ValueError(
"Underlying estimator {} does not support sample weights."
.format(estimator.__class__.__name__)
) from exc
raise
else:
estimator.fit(X, y)
return estimator
Expand All @@ -53,8 +61,8 @@ def _weights_not_none(self):
"""Get the weights of not `None` estimators"""
if self.weights is None:
return None
return [w for est, w in zip(self.estimators,
self.weights) if est[1] is not None]
return [w for est, w in zip(self.estimators, self.weights)
if est[1] not in (None, 'drop')]

def _predict(self, X):
"""Collect results from clf.predict calls. """
Expand All @@ -76,26 +84,22 @@ def fit(self, X, y, sample_weight=None):
'; got %d weights, %d estimators'
% (len(self.weights), len(self.estimators)))

if sample_weight is not None:
for name, step in self.estimators:
if step is None:
continue
if not has_fit_parameter(step, 'sample_weight'):
raise ValueError('Underlying estimator \'%s\' does not'
' support sample weights.' % name)

names, clfs = zip(*self.estimators)
self._validate_names(names)

n_isnone = np.sum([clf is None for _, clf in self.estimators])
n_isnone = np.sum(
[clf in (None, 'drop') for _, clf in self.estimators]
)
if n_isnone == len(self.estimators):
raise ValueError('All estimators are None. At least one is '
'required!')
raise ValueError(
'All estimators are None or "drop". At least one is required!'
)

self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_parallel_fit_estimator)(clone(clf), X, y,
sample_weight=sample_weight)
for clf in clfs if clf is not None)
for clf in clfs if clf not in (None, 'drop')
)

self.named_estimators_ = Bunch()
for k, e in zip(self.estimators, self.estimators_):
Expand Down Expand Up @@ -149,8 +153,8 @@ class VotingClassifier(_BaseVoting, ClassifierMixin):
estimators : list of (string, estimator) tuples
Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones
of those original estimators that will be stored in the class attribute
``self.estimators_``. An estimator can be set to `None` using
``set_params``.
``self.estimators_``. An estimator can be set to ``None`` or ``'drop'``
using ``set_params``.

voting : str, {'hard', 'soft'} (default='hard')
If 'hard', uses predicted class labels for majority rule voting.
Expand Down Expand Up @@ -381,9 +385,9 @@ class VotingRegressor(_BaseVoting, RegressorMixin):
Parameters
----------
estimators : list of (string, estimator) tuples
Invoking the ``fit`` method on the ``VotingRegressor`` will fit
clones of those original estimators that will be stored in the class
attribute ``self.estimators_``. An estimator can be set to `None`
Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones
of those original estimators that will be stored in the class attribute
``self.estimators_``. An estimator can be set to ``None`` or ``'drop'``
using ``set_params``.

weights : array-like, shape (n_regressors,), optional (default=`None`)
Expand Down