Skip to content

FIX Add error when LeaveOneOut used in CalibratedClassifierCV #29545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats_new/v1.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Version 1.5.2
Changelog
---------

:mod:`sklearn.calibration`
..........................

- |Fix| Raise error when :class:`~sklearn.model_selection.LeaveOneOut` used in
`cv`, matching what would happen if `KFold(n_splits=n_samples)` was used.
:pr:`29545` by :user:`Lucy Liu <lucyleeow>`

:mod:`sklearn.compose`
......................

Expand Down
9 changes: 8 additions & 1 deletion sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
clone,
)
from .isotonic import IsotonicRegression
from .model_selection import check_cv, cross_val_predict
from .model_selection import LeaveOneOut, check_cv, cross_val_predict
from .preprocessing import LabelEncoder, label_binarize
from .svm import LinearSVC
from .utils import (
Expand Down Expand Up @@ -390,6 +390,13 @@ def fit(self, X, y, sample_weight=None, **fit_params):
"cross-validation but provided less than "
f"{n_folds} examples for at least one class."
)
if isinstance(self.cv, LeaveOneOut):
raise ValueError(
"LeaveOneOut cross-validation does not allow"
"all classes to be present in test splits. "
"Please use a cross-validation generator that allows "
"all classes to appear in every test and train split."
)
cv = check_cv(self.cv, y, classifier=True)

if self.ensemble:
Expand Down
62 changes: 39 additions & 23 deletions sklearn/tests/test_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ def test_calibration_cv_splitter(data, ensemble):
assert len(calib_clf.calibrated_classifiers_) == expected_n_clf


def test_calibration_cv_nfold(data):
# Check error raised when number of examples per class less than nfold
X, y = data

kfold = KFold(n_splits=101)
calib_clf = CalibratedClassifierCV(cv=kfold, ensemble=True)
with pytest.raises(ValueError, match="Requesting 101-fold cross-validation"):
calib_clf.fit(X, y)

calib_clf = CalibratedClassifierCV(cv=LeaveOneOut(), ensemble=True)
with pytest.raises(ValueError, match="LeaveOneOut cross-validation does"):
calib_clf.fit(X, y)


@pytest.mark.parametrize("method", ["sigmoid", "isotonic"])
@pytest.mark.parametrize("ensemble", [True, False])
def test_sample_weight(data, method, ensemble):
Expand Down Expand Up @@ -423,45 +437,47 @@ def test_calibration_nan_imputer(ensemble):

@pytest.mark.parametrize("ensemble", [True, False])
def test_calibration_prob_sum(ensemble):
# Test that sum of probabilities is 1. A non-regression test for
# issue #7796
num_classes = 2
X, y = make_classification(n_samples=10, n_features=5, n_classes=num_classes)
# Test that sum of probabilities is (max) 1. A non-regression test for
# issue #7796 - when test has fewer classes than train
X, _ = make_classification(n_samples=10, n_features=5, n_classes=2)
y = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
clf = LinearSVC(C=1.0, random_state=7)
# In the first and last fold, test will have 1 class while train will have 2
clf_prob = CalibratedClassifierCV(
clf, method="sigmoid", cv=LeaveOneOut(), ensemble=ensemble
clf, method="sigmoid", cv=KFold(n_splits=3), ensemble=ensemble
)
clf_prob.fit(X, y)

probs = clf_prob.predict_proba(X)
assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
assert_allclose(clf_prob.predict_proba(X).sum(axis=1), 1.0)


@pytest.mark.parametrize("ensemble", [True, False])
def test_calibration_less_classes(ensemble):
# Test to check calibration works fine when train set in a test-train
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a use case where I'm really wondering if this is valid :). But this is here so let's go with it.

# split does not contain all classes
# Since this test uses LOO, at each iteration train set will not contain a
# class label
X = np.random.randn(10, 5)
y = np.arange(10)
clf = LinearSVC(C=1.0, random_state=7)
# In 1st split, train is missing class 0
# In 3rd split, train is missing class 3
X = np.random.randn(12, 5)
y = [0, 0, 0, 1] + [1, 1, 2, 2] + [2, 3, 3, 3]
clf = DecisionTreeClassifier(random_state=7)
cal_clf = CalibratedClassifierCV(
clf, method="sigmoid", cv=LeaveOneOut(), ensemble=ensemble
clf, method="sigmoid", cv=KFold(3), ensemble=ensemble
)
cal_clf.fit(X, y)

for i, calibrated_classifier in enumerate(cal_clf.calibrated_classifiers_):
proba = calibrated_classifier.predict_proba(X)
if ensemble:
if ensemble:
classes = np.arange(4)
for calib_i, class_i in zip([0, 2], [0, 3]):
proba = cal_clf.calibrated_classifiers_[calib_i].predict_proba(X)
# Check that the unobserved class has proba=0
assert_array_equal(proba[:, i], np.zeros(len(y)))
assert_array_equal(proba[:, class_i], np.zeros(len(y)))
# Check for all other classes proba>0
assert np.all(proba[:, :i] > 0)
assert np.all(proba[:, i + 1 :] > 0)
else:
# Check `proba` are all 1/n_classes
Copy link
Member Author

@lucyleeow lucyleeow Jul 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note proba was 1/n_classes here because the original test data was unique (consisted of 10 samples belonging to 10 classes) and this was not really related to train subset not containing all classes.
I think the estimator ended up being overfit and the calibrator did not respond well to low predict values, calibrating them all to the same value. Note proba was the same value even before normalization of the probabilities

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we even have to check ensemble=False as we use cross_val_predict to get the predictions to use to calibrate and at predict, only one estimator is fit using all the data.

assert np.allclose(proba, 1 / proba.shape[0])
assert np.all(proba[:, classes != class_i] > 0)

# When `ensemble=False`, `cross_val_predict` is used to compute predictions
# to fit only one `calibrated_classifiers_`
else:
proba = cal_clf.calibrated_classifiers_[0].predict_proba(X)
assert_array_almost_equal(proba.sum(axis=1), np.ones(proba.shape[0]))


@pytest.mark.parametrize(
Expand Down