diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index 806e0b8cb354c..90c99e6c04c19 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -182,6 +182,13 @@ Changelog used during `fit`. :pr:`16437` by :user:`Jin-Hwan CHO `. +- |Fix| Fixed a bug in :class:`ensemble.StackingClassifier` and + :class:`ensemble.StackingRegressor` where the `sample_weight` + argument was not being passed to `cross_val_predict` when + evaluating the base estimators on cross-validation folds + to obtain the input to the meta estimator. + :pr:`16539` by :user:`Bill DeRose `. + :mod:`sklearn.feature_extraction` ................................. diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index cd18a684a4518..ba817613523f6 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -122,6 +122,10 @@ def fit(self, X, y, sample_weight=None): Note that this is supported only if all underlying estimators support sample weights. + .. versionchanged:: 0.23 + when not None, `sample_weight` is passed to all underlying + estimators + Returns ------- self : object @@ -166,10 +170,13 @@ def fit(self, X, y, sample_weight=None): self._method_name(name, est, meth) for name, est, meth in zip(names, all_estimators, stack_method) ] - + fit_params = ({"sample_weight": sample_weight} + if sample_weight is not None + else None) predictions = Parallel(n_jobs=self.n_jobs)( delayed(cross_val_predict)(clone(est), X, y, cv=deepcopy(cv), method=meth, n_jobs=self.n_jobs, + fit_params=fit_params, verbose=self.verbose) for est, meth in zip(all_estimators, self.stack_method_) if est != 'drop' @@ -183,21 +190,8 @@ def fit(self, X, y, sample_weight=None): ] X_meta = self._concatenate_predictions(X, predictions) - if sample_weight is not None: - try: - self.final_estimator_.fit( - X_meta, y, sample_weight=sample_weight - ) - except TypeError as exc: - if "unexpected keyword argument 'sample_weight'" in str(exc): - raise TypeError( - "Underlying estimator {} does not support sample " - "weights." - .format(self.final_estimator_.__class__.__name__) - ) from exc - raise - else: - self.final_estimator_.fit(X_meta, y) + _fit_single_estimator(self.final_estimator_, X_meta, y, + sample_weight=sample_weight) return self diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py index 1eff7ba5f7de7..f8a3f290e96b5 100644 --- a/sklearn/ensemble/tests/test_stacking.py +++ b/sklearn/ensemble/tests/test_stacking.py @@ -38,6 +38,7 @@ from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import KFold +from sklearn.utils._mocking import CheckingClassifier from sklearn.utils._testing import assert_allclose from sklearn.utils._testing import assert_allclose_dense_sparse from sklearn.utils._testing import ignore_warnings @@ -439,6 +440,19 @@ def test_stacking_with_sample_weight(stacker, X, y): assert np.abs(y_pred_no_weight - y_pred_biased).sum() > 0 +def test_stacking_classifier_sample_weight_fit_param(): + # check sample_weight is passed to all invocations of fit + stacker = StackingClassifier( + estimators=[ + ('lr', CheckingClassifier(expected_fit_params=['sample_weight'])) + ], + final_estimator=CheckingClassifier( + expected_fit_params=['sample_weight'] + ) + ) + stacker.fit(X_iris, y_iris, sample_weight=np.ones(X_iris.shape[0])) + + @pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning") @pytest.mark.parametrize( "stacker, X, y", diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py index 25b60f7955b99..cff4183ea9bc4 100644 --- a/sklearn/utils/_mocking.py +++ b/sklearn/utils/_mocking.py @@ -95,6 +95,7 @@ def fit(self, X, y, **fit_params): assert self.check_X(X) if self.check_y is not None: assert self.check_y(y) + self.n_features_in_ = len(X) self.classes_ = np.unique(check_array(y, ensure_2d=False, allow_nd=True)) if self.expected_fit_params: