diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 34e9f0670ba81..80fbc26919ae5 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -135,6 +135,10 @@ Changelog :class:`calibration.CalibratedClassifierCV` can now properly be used on prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre `. +- |Fix| Fixed an error when using a ::class:`ensemble.VotingClassifier` + as `base_estimator` in ::class:`calibration.CalibratedClassifierCV`. + :pr:`20087` by :user:`Clément Fauchereau `. + :mod:`sklearn.cluster` ...................... diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 084f3bf242e3c..abdbed1bb797b 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -257,9 +257,10 @@ def fit(self, X, y, sample_weight=None): check_is_fitted(self.base_estimator, attributes=["classes_"]) self.classes_ = self.base_estimator.classes_ - pred_method = _get_prediction_method(base_estimator) + pred_method, method_name = _get_prediction_method(base_estimator) n_classes = len(self.classes_) - predictions = _compute_predictions(pred_method, X, n_classes) + predictions = _compute_predictions(pred_method, method_name, X, + n_classes) calibrated_classifier = _fit_calibrator( base_estimator, predictions, y, self.classes_, self.method, @@ -310,12 +311,13 @@ def fit(self, X, y, sample_weight=None): ) else: this_estimator = clone(base_estimator) - method_name = _get_prediction_method(this_estimator).__name__ + _, method_name = _get_prediction_method(this_estimator) pred_method = partial( cross_val_predict, estimator=this_estimator, X=X, y=y, cv=cv, method=method_name, n_jobs=self.n_jobs ) - predictions = _compute_predictions(pred_method, X, n_classes) + predictions = _compute_predictions(pred_method, method_name, X, + n_classes) if sample_weight is not None and supports_sw: this_estimator.fit(X, y, sample_weight) @@ -441,8 +443,9 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw, estimator.fit(X_train, y_train) n_classes = len(classes) - pred_method = _get_prediction_method(estimator) - predictions = _compute_predictions(pred_method, X_test, n_classes) + pred_method, method_name = _get_prediction_method(estimator) + predictions = _compute_predictions(pred_method, method_name, X_test, + n_classes) calibrated_classifier = _fit_calibrator( estimator, predictions, y_test, classes, method, sample_weight=sw_test @@ -465,18 +468,21 @@ def _get_prediction_method(clf): ------- prediction_method : callable The prediction method. + method_name : str + The name of the prediction method. """ if hasattr(clf, 'decision_function'): method = getattr(clf, 'decision_function') + return method, 'decision_function' elif hasattr(clf, 'predict_proba'): method = getattr(clf, 'predict_proba') + return method, 'predict_proba' else: raise RuntimeError("'base_estimator' has no 'decision_function' or " "'predict_proba' method.") - return method -def _compute_predictions(pred_method, X, n_classes): +def _compute_predictions(pred_method, method_name, X, n_classes): """Return predictions for `X` and reshape binary outputs to shape (n_samples, 1). @@ -485,6 +491,9 @@ def _compute_predictions(pred_method, X, n_classes): pred_method : callable Prediction method. + method_name: str + Name of the prediction method + X : array-like or None Data used to obtain predictions. @@ -498,10 +507,6 @@ def _compute_predictions(pred_method, X, n_classes): (X.shape[0], 1). """ predictions = pred_method(X=X) - if hasattr(pred_method, '__name__'): - method_name = pred_method.__name__ - else: - method_name = signature(pred_method).parameters['method'].default if method_name == 'decision_function': if predictions.ndim == 1: @@ -634,8 +639,9 @@ def predict_proba(self, X): The predicted probabilities. Can be exact zeros. """ n_classes = len(self.classes) - pred_method = _get_prediction_method(self.base_estimator) - predictions = _compute_predictions(pred_method, X, n_classes) + pred_method, method_name = _get_prediction_method(self.base_estimator) + predictions = _compute_predictions(pred_method, method_name, X, + n_classes) label_encoder = LabelEncoder().fit(self.classes) pos_class_indices = label_encoder.transform( diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 53d620b41031c..210d90f99f845 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -20,7 +20,8 @@ from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import KFold, cross_val_predict from sklearn.naive_bayes import MultinomialNB -from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.ensemble import (RandomForestClassifier, RandomForestRegressor, + VotingClassifier) from sklearn.svm import LinearSVC from sklearn.isotonic import IsotonicRegression from sklearn.feature_extraction import DictVectorizer @@ -607,3 +608,20 @@ def test_calibrated_classifier_cv_deprecation(data): calibrators, calib_clf.calibrated_classifiers_[0].calibrators ): assert clf1 is clf2 + + +def test_calibration_votingclassifier(): + # Check that `CalibratedClassifier` works with `VotingClassifier`. + # The method `predict_proba` from `VotingClassifier` is dynamically + # defined via a property that only works when voting="soft". + X, y = make_classification(n_samples=10, n_features=5, + n_classes=2, random_state=7) + vote = VotingClassifier( + estimators=[('dummy'+str(i), DummyClassifier()) for i in range(3)], + voting="soft" + ) + vote.fit(X, y) + + calib_clf = CalibratedClassifierCV(base_estimator=vote, cv="prefit") + # smoke test: should not raise an error + calib_clf.fit(X, y)