diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst index 3dcb8b31773c7..21b9ee1c9b6a2 100644 --- a/doc/whats_new/v0.21.rst +++ b/doc/whats_new/v0.21.rst @@ -324,6 +324,12 @@ Support for Python 3.4 and below has been officially dropped. of the estimators was set to ``None`` and ``sample_weight`` was not ``None``. :pr:`13779` by :user:`Guillaume Lemaitre `. +- |API| :class:`ensemble.VotingClassifier` and + :class:`ensemble.VotingRegressor` accept ``'drop'`` to disable an estimator + in addition to ``None`` to be consistent with other estimators (i.e., + :class:`pipeline.FeatureUnion` and :class:`compose.ColumnTransformer`). + :pr:`13780` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.externals` ........................ diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index 0a4a9eab8c160..b2b16cf8eeec3 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -342,12 +342,25 @@ def test_sample_weight(): assert_array_equal(eclf3.predict(X), clf1.predict(X)) assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X)) + # check that an error is raised and indicative if sample_weight is not + # supported. clf4 = KNeighborsClassifier() eclf3 = VotingClassifier(estimators=[ ('lr', clf1), ('svc', clf3), ('knn', clf4)], voting='soft') - msg = ('Underlying estimator \'knn\' does not support sample weights.') - assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight) + msg = ('Underlying estimator KNeighborsClassifier does not support ' + 'sample weights.') + with pytest.raises(ValueError, match=msg): + eclf3.fit(X, y, sample_weight) + + # check that _parallel_fit_estimator will raise the right error + # it should raise the original error if this is not linked to sample_weight + class ClassifierErrorFit(BaseEstimator, ClassifierMixin): + def fit(self, X, y, sample_weight): + raise TypeError('Error unrelated to sample_weight.') + clf = ClassifierErrorFit() + with pytest.raises(TypeError, match='Error unrelated to sample_weight'): + clf.fit(X, y, sample_weight=sample_weight) def test_sample_weight_kwargs(): @@ -404,8 +417,10 @@ def test_set_params(): @pytest.mark.filterwarnings('ignore: Default solver will be changed') # 0.22 @pytest.mark.filterwarnings('ignore: Default multi_class will') # 0.22 @pytest.mark.filterwarnings('ignore:The default value of n_estimators') -def test_set_estimator_none(): - """VotingClassifier set_params should be able to set estimators as None""" +@pytest.mark.parametrize("drop", [None, 'drop']) +def test_set_estimator_none(drop): + """VotingClassifier set_params should be able to set estimators as None or + drop""" # Test predict clf1 = LogisticRegression(random_state=123) clf2 = RandomForestClassifier(random_state=123) @@ -417,22 +432,22 @@ def test_set_estimator_none(): eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('nb', clf3)], voting='hard', weights=[1, 1, 0.5]) - eclf2.set_params(rf=None).fit(X, y) + eclf2.set_params(rf=drop).fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) - assert dict(eclf2.estimators)["rf"] is None + assert dict(eclf2.estimators)["rf"] is drop assert len(eclf2.estimators_) == 2 assert all(isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_) - assert eclf2.get_params()["rf"] is None + assert eclf2.get_params()["rf"] is drop eclf1.set_params(voting='soft').fit(X, y) eclf2.set_params(voting='soft').fit(X, y) assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) - msg = 'All estimators are None. At least one is required!' + msg = 'All estimators are None or "drop". At least one is required!' assert_raise_message( - ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y) + ValueError, msg, eclf2.set_params(lr=drop, rf=drop, nb=drop).fit, X, y) # Test soft voting transform X1 = np.array([[1], [2]]) @@ -444,7 +459,7 @@ def test_set_estimator_none(): eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)], voting='soft', weights=[1, 0.5], flatten_transform=False) - eclf2.set_params(rf=None).fit(X1, y1) + eclf2.set_params(rf=drop).fit(X1, y1) assert_array_almost_equal(eclf1.transform(X1), np.array([[[0.7, 0.3], [0.3, 0.7]], [[1., 0.], [0., 1.]]])) @@ -522,12 +537,13 @@ def test_transform(): [('lr', LinearRegression()), ('rf', RandomForestRegressor(n_estimators=5))]))] ) -def test_none_estimator_with_weights(X, y, voter): +@pytest.mark.parametrize("drop", [None, 'drop']) +def test_none_estimator_with_weights(X, y, voter, drop): # check that an estimator can be set to None and passing some weight # regression test for # https://github.com/scikit-learn/scikit-learn/issues/13777 voter.fit(X, y, sample_weight=np.ones(y.shape)) - voter.set_params(lr=None) + voter.set_params(lr=drop) voter.fit(X, y, sample_weight=np.ones(y.shape)) y_pred = voter.predict(X) assert y_pred.shape == y.shape diff --git a/sklearn/ensemble/voting.py b/sklearn/ensemble/voting.py index d8e14b152d3ab..f60bb8f49b81d 100644 --- a/sklearn/ensemble/voting.py +++ b/sklearn/ensemble/voting.py @@ -30,7 +30,15 @@ def _parallel_fit_estimator(estimator, X, y, sample_weight=None): """Private function used to fit an estimator within a job.""" if sample_weight is not None: - estimator.fit(X, y, sample_weight=sample_weight) + try: + estimator.fit(X, y, sample_weight=sample_weight) + except TypeError as exc: + if "unexpected keyword argument 'sample_weight'" in str(exc): + raise ValueError( + "Underlying estimator {} does not support sample weights." + .format(estimator.__class__.__name__) + ) from exc + raise else: estimator.fit(X, y) return estimator @@ -53,8 +61,8 @@ def _weights_not_none(self): """Get the weights of not `None` estimators""" if self.weights is None: return None - return [w for est, w in zip(self.estimators, - self.weights) if est[1] is not None] + return [w for est, w in zip(self.estimators, self.weights) + if est[1] not in (None, 'drop')] def _predict(self, X): """Collect results from clf.predict calls. """ @@ -76,26 +84,22 @@ def fit(self, X, y, sample_weight=None): '; got %d weights, %d estimators' % (len(self.weights), len(self.estimators))) - if sample_weight is not None: - for name, step in self.estimators: - if step is None: - continue - if not has_fit_parameter(step, 'sample_weight'): - raise ValueError('Underlying estimator \'%s\' does not' - ' support sample weights.' % name) - names, clfs = zip(*self.estimators) self._validate_names(names) - n_isnone = np.sum([clf is None for _, clf in self.estimators]) + n_isnone = np.sum( + [clf in (None, 'drop') for _, clf in self.estimators] + ) if n_isnone == len(self.estimators): - raise ValueError('All estimators are None. At least one is ' - 'required!') + raise ValueError( + 'All estimators are None or "drop". At least one is required!' + ) self.estimators_ = Parallel(n_jobs=self.n_jobs)( delayed(_parallel_fit_estimator)(clone(clf), X, y, sample_weight=sample_weight) - for clf in clfs if clf is not None) + for clf in clfs if clf not in (None, 'drop') + ) self.named_estimators_ = Bunch() for k, e in zip(self.estimators, self.estimators_): @@ -149,8 +153,8 @@ class VotingClassifier(_BaseVoting, ClassifierMixin): estimators : list of (string, estimator) tuples Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones of those original estimators that will be stored in the class attribute - ``self.estimators_``. An estimator can be set to `None` using - ``set_params``. + ``self.estimators_``. An estimator can be set to ``None`` or ``'drop'`` + using ``set_params``. voting : str, {'hard', 'soft'} (default='hard') If 'hard', uses predicted class labels for majority rule voting. @@ -381,9 +385,9 @@ class VotingRegressor(_BaseVoting, RegressorMixin): Parameters ---------- estimators : list of (string, estimator) tuples - Invoking the ``fit`` method on the ``VotingRegressor`` will fit - clones of those original estimators that will be stored in the class - attribute ``self.estimators_``. An estimator can be set to `None` + Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones + of those original estimators that will be stored in the class attribute + ``self.estimators_``. An estimator can be set to ``None`` or ``'drop'`` using ``set_params``. weights : array-like, shape (n_regressors,), optional (default=`None`)