Skip to content

Commit f688e28

Browse files
glemaitrejnothman
authored andcommitted
API use 'drop' to disable estimators in voting (scikit-learn#13780)
1 parent f1995b2 commit f688e28

File tree

3 files changed

+58
-32
lines changed

3 files changed

+58
-32
lines changed

doc/whats_new/v0.21.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,12 @@ Support for Python 3.4 and below has been officially dropped.
324324
of the estimators was set to ``None`` and ``sample_weight`` was not ``None``.
325325
:pr:`13779` by :user:`Guillaume Lemaitre <glemaitre>`.
326326

327+
- |API| :class:`ensemble.VotingClassifier` and
328+
:class:`ensemble.VotingRegressor` accept ``'drop'`` to disable an estimator
329+
in addition to ``None`` to be consistent with other estimators (i.e.,
330+
:class:`pipeline.FeatureUnion` and :class:`compose.ColumnTransformer`).
331+
:pr:`13780` by :user:`Guillaume Lemaitre <glemaitre>`.
332+
327333
:mod:`sklearn.externals`
328334
........................
329335

sklearn/ensemble/tests/test_voting.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -342,12 +342,25 @@ def test_sample_weight():
342342
assert_array_equal(eclf3.predict(X), clf1.predict(X))
343343
assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))
344344

345+
# check that an error is raised and indicative if sample_weight is not
346+
# supported.
345347
clf4 = KNeighborsClassifier()
346348
eclf3 = VotingClassifier(estimators=[
347349
('lr', clf1), ('svc', clf3), ('knn', clf4)],
348350
voting='soft')
349-
msg = ('Underlying estimator \'knn\' does not support sample weights.')
350-
assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight)
351+
msg = ('Underlying estimator KNeighborsClassifier does not support '
352+
'sample weights.')
353+
with pytest.raises(ValueError, match=msg):
354+
eclf3.fit(X, y, sample_weight)
355+
356+
# check that _parallel_fit_estimator will raise the right error
357+
# it should raise the original error if this is not linked to sample_weight
358+
class ClassifierErrorFit(BaseEstimator, ClassifierMixin):
359+
def fit(self, X, y, sample_weight):
360+
raise TypeError('Error unrelated to sample_weight.')
361+
clf = ClassifierErrorFit()
362+
with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
363+
clf.fit(X, y, sample_weight=sample_weight)
351364

352365

353366
def test_sample_weight_kwargs():
@@ -404,8 +417,10 @@ def test_set_params():
404417
@pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22
405418
@pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22
406419
@pytest.mark.filterwarnings('ignore:The default value of n_estimators')
407-
def test_set_estimator_none():
408-
"""VotingClassifier set_params should be able to set estimators as None"""
420+
@pytest.mark.parametrize("drop", [None, 'drop'])
421+
def test_set_estimator_none(drop):
422+
"""VotingClassifier set_params should be able to set estimators as None or
423+
drop"""
409424
# Test predict
410425
clf1 = LogisticRegression(random_state=123)
411426
clf2 = RandomForestClassifier(random_state=123)
@@ -417,22 +432,22 @@ def test_set_estimator_none():
417432
eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
418433
('nb', clf3)],
419434
voting='hard', weights=[1, 1, 0.5])
420-
eclf2.set_params(rf=None).fit(X, y)
435+
eclf2.set_params(rf=drop).fit(X, y)
421436
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
422437

423-
assert dict(eclf2.estimators)["rf"] is None
438+
assert dict(eclf2.estimators)["rf"] is drop
424439
assert len(eclf2.estimators_) == 2
425440
assert all(isinstance(est, (LogisticRegression, GaussianNB))
426441
for est in eclf2.estimators_)
427-
assert eclf2.get_params()["rf"] is None
442+
assert eclf2.get_params()["rf"] is drop
428443

429444
eclf1.set_params(voting='soft').fit(X, y)
430445
eclf2.set_params(voting='soft').fit(X, y)
431446
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
432447
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
433-
msg = 'All estimators are None. At least one is required!'
448+
msg = 'All estimators are None or "drop". At least one is required!'
434449
assert_raise_message(
435-
ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y)
450+
ValueError, msg, eclf2.set_params(lr=drop, rf=drop, nb=drop).fit, X, y)
436451

437452
# Test soft voting transform
438453
X1 = np.array([[1], [2]])
@@ -444,7 +459,7 @@ def test_set_estimator_none():
444459
eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)],
445460
voting='soft', weights=[1, 0.5],
446461
flatten_transform=False)
447-
eclf2.set_params(rf=None).fit(X1, y1)
462+
eclf2.set_params(rf=drop).fit(X1, y1)
448463
assert_array_almost_equal(eclf1.transform(X1),
449464
np.array([[[0.7, 0.3], [0.3, 0.7]],
450465
[[1., 0.], [0., 1.]]]))
@@ -522,12 +537,13 @@ def test_transform():
522537
[('lr', LinearRegression()),
523538
('rf', RandomForestRegressor(n_estimators=5))]))]
524539
)
525-
def test_none_estimator_with_weights(X, y, voter):
540+
@pytest.mark.parametrize("drop", [None, 'drop'])
541+
def test_none_estimator_with_weights(X, y, voter, drop):
526542
# check that an estimator can be set to None and passing some weight
527543
# regression test for
528544
# https://github.com/scikit-learn/scikit-learn/issues/13777
529545
voter.fit(X, y, sample_weight=np.ones(y.shape))
530-
voter.set_params(lr=None)
546+
voter.set_params(lr=drop)
531547
voter.fit(X, y, sample_weight=np.ones(y.shape))
532548
y_pred = voter.predict(X)
533549
assert y_pred.shape == y.shape

sklearn/ensemble/voting.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,15 @@
3030
def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
3131
"""Private function used to fit an estimator within a job."""
3232
if sample_weight is not None:
33-
estimator.fit(X, y, sample_weight=sample_weight)
33+
try:
34+
estimator.fit(X, y, sample_weight=sample_weight)
35+
except TypeError as exc:
36+
if "unexpected keyword argument 'sample_weight'" in str(exc):
37+
raise ValueError(
38+
"Underlying estimator {} does not support sample weights."
39+
.format(estimator.__class__.__name__)
40+
) from exc
41+
raise
3442
else:
3543
estimator.fit(X, y)
3644
return estimator
@@ -53,8 +61,8 @@ def _weights_not_none(self):
5361
"""Get the weights of not `None` estimators"""
5462
if self.weights is None:
5563
return None
56-
return [w for est, w in zip(self.estimators,
57-
self.weights) if est[1] is not None]
64+
return [w for est, w in zip(self.estimators, self.weights)
65+
if est[1] not in (None, 'drop')]
5866

5967
def _predict(self, X):
6068
"""Collect results from clf.predict calls. """
@@ -76,26 +84,22 @@ def fit(self, X, y, sample_weight=None):
7684
'; got %d weights, %d estimators'
7785
% (len(self.weights), len(self.estimators)))
7886

79-
if sample_weight is not None:
80-
for name, step in self.estimators:
81-
if step is None:
82-
continue
83-
if not has_fit_parameter(step, 'sample_weight'):
84-
raise ValueError('Underlying estimator \'%s\' does not'
85-
' support sample weights.' % name)
86-
8787
names, clfs = zip(*self.estimators)
8888
self._validate_names(names)
8989

90-
n_isnone = np.sum([clf is None for _, clf in self.estimators])
90+
n_isnone = np.sum(
91+
[clf in (None, 'drop') for _, clf in self.estimators]
92+
)
9193
if n_isnone == len(self.estimators):
92-
raise ValueError('All estimators are None. At least one is '
93-
'required!')
94+
raise ValueError(
95+
'All estimators are None or "drop". At least one is required!'
96+
)
9497

9598
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
9699
delayed(_parallel_fit_estimator)(clone(clf), X, y,
97100
sample_weight=sample_weight)
98-
for clf in clfs if clf is not None)
101+
for clf in clfs if clf not in (None, 'drop')
102+
)
99103

100104
self.named_estimators_ = Bunch()
101105
for k, e in zip(self.estimators, self.estimators_):
@@ -149,8 +153,8 @@ class VotingClassifier(_BaseVoting, ClassifierMixin):
149153
estimators : list of (string, estimator) tuples
150154
Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones
151155
of those original estimators that will be stored in the class attribute
152-
``self.estimators_``. An estimator can be set to `None` using
153-
``set_params``.
156+
``self.estimators_``. An estimator can be set to ``None`` or ``'drop'``
157+
using ``set_params``.
154158
155159
voting : str, {'hard', 'soft'} (default='hard')
156160
If 'hard', uses predicted class labels for majority rule voting.
@@ -381,9 +385,9 @@ class VotingRegressor(_BaseVoting, RegressorMixin):
381385
Parameters
382386
----------
383387
estimators : list of (string, estimator) tuples
384-
Invoking the ``fit`` method on the ``VotingRegressor`` will fit
385-
clones of those original estimators that will be stored in the class
386-
attribute ``self.estimators_``. An estimator can be set to `None`
388+
Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones
389+
of those original estimators that will be stored in the class attribute
390+
``self.estimators_``. An estimator can be set to ``None`` or ``'drop'``
387391
using ``set_params``.
388392
389393
weights : array-like, shape (n_regressors,), optional (default=`None`)

0 commit comments

Comments
 (0)