Skip to content

FIX Fix error when using Calibrated with Voting #20087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ Changelog
:class:`calibration.CalibratedClassifierCV` can now properly be used on
prefitted pipelines. :pr:`19641` by :user:`Alek Lefebvre <AlekLefebvre>`.

- |Fix| Fixed an error when using a ::class:`ensemble.VotingClassifier`
as `base_estimator` in ::class:`calibration.CalibratedClassifierCV`.
:pr:`20087` by :user:`Clément Fauchereau <clement-f>`.

:mod:`sklearn.cluster`
......................

Expand Down
34 changes: 20 additions & 14 deletions sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,10 @@ def fit(self, X, y, sample_weight=None):
check_is_fitted(self.base_estimator, attributes=["classes_"])
self.classes_ = self.base_estimator.classes_

pred_method = _get_prediction_method(base_estimator)
pred_method, method_name = _get_prediction_method(base_estimator)
n_classes = len(self.classes_)
predictions = _compute_predictions(pred_method, X, n_classes)
predictions = _compute_predictions(pred_method, method_name, X,
n_classes)

calibrated_classifier = _fit_calibrator(
base_estimator, predictions, y, self.classes_, self.method,
Expand Down Expand Up @@ -310,12 +311,13 @@ def fit(self, X, y, sample_weight=None):
)
else:
this_estimator = clone(base_estimator)
method_name = _get_prediction_method(this_estimator).__name__
_, method_name = _get_prediction_method(this_estimator)
pred_method = partial(
cross_val_predict, estimator=this_estimator, X=X, y=y,
cv=cv, method=method_name, n_jobs=self.n_jobs
)
predictions = _compute_predictions(pred_method, X, n_classes)
predictions = _compute_predictions(pred_method, method_name, X,
n_classes)

if sample_weight is not None and supports_sw:
this_estimator.fit(X, y, sample_weight)
Expand Down Expand Up @@ -441,8 +443,9 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw,
estimator.fit(X_train, y_train)

n_classes = len(classes)
pred_method = _get_prediction_method(estimator)
predictions = _compute_predictions(pred_method, X_test, n_classes)
pred_method, method_name = _get_prediction_method(estimator)
predictions = _compute_predictions(pred_method, method_name, X_test,
n_classes)

calibrated_classifier = _fit_calibrator(
estimator, predictions, y_test, classes, method, sample_weight=sw_test
Expand All @@ -465,18 +468,21 @@ def _get_prediction_method(clf):
-------
prediction_method : callable
The prediction method.
method_name : str
The name of the prediction method.
"""
if hasattr(clf, 'decision_function'):
method = getattr(clf, 'decision_function')
return method, 'decision_function'
elif hasattr(clf, 'predict_proba'):
method = getattr(clf, 'predict_proba')
return method, 'predict_proba'
else:
raise RuntimeError("'base_estimator' has no 'decision_function' or "
"'predict_proba' method.")
return method


def _compute_predictions(pred_method, X, n_classes):
def _compute_predictions(pred_method, method_name, X, n_classes):
"""Return predictions for `X` and reshape binary outputs to shape
(n_samples, 1).

Expand All @@ -485,6 +491,9 @@ def _compute_predictions(pred_method, X, n_classes):
pred_method : callable
Prediction method.

method_name: str
Name of the prediction method

X : array-like or None
Data used to obtain predictions.

Expand All @@ -498,10 +507,6 @@ def _compute_predictions(pred_method, X, n_classes):
(X.shape[0], 1).
"""
predictions = pred_method(X=X)
if hasattr(pred_method, '__name__'):
method_name = pred_method.__name__
else:
method_name = signature(pred_method).parameters['method'].default

if method_name == 'decision_function':
if predictions.ndim == 1:
Expand Down Expand Up @@ -634,8 +639,9 @@ def predict_proba(self, X):
The predicted probabilities. Can be exact zeros.
"""
n_classes = len(self.classes)
pred_method = _get_prediction_method(self.base_estimator)
predictions = _compute_predictions(pred_method, X, n_classes)
pred_method, method_name = _get_prediction_method(self.base_estimator)
predictions = _compute_predictions(pred_method, method_name, X,
n_classes)

label_encoder = LabelEncoder().fit(self.classes)
pos_class_indices = label_encoder.transform(
Expand Down
20 changes: 19 additions & 1 deletion sklearn/tests/test_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import (RandomForestClassifier, RandomForestRegressor,
VotingClassifier)
from sklearn.svm import LinearSVC
from sklearn.isotonic import IsotonicRegression
from sklearn.feature_extraction import DictVectorizer
Expand Down Expand Up @@ -607,3 +608,20 @@ def test_calibrated_classifier_cv_deprecation(data):
calibrators, calib_clf.calibrated_classifiers_[0].calibrators
):
assert clf1 is clf2


def test_calibration_votingclassifier():
# Check that `CalibratedClassifier` works with `VotingClassifier`.
# The method `predict_proba` from `VotingClassifier` is dynamically
# defined via a property that only works when voting="soft".
X, y = make_classification(n_samples=10, n_features=5,
n_classes=2, random_state=7)
vote = VotingClassifier(
estimators=[('dummy'+str(i), DummyClassifier()) for i in range(3)],
voting="soft"
)
vote.fit(X, y)

calib_clf = CalibratedClassifierCV(base_estimator=vote, cv="prefit")
# smoke test: should not raise an error
calib_clf.fit(X, y)