From 2f6de703d6ca3f3f1c1f9eae726d02ec436c047e Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 25 Feb 2021 10:49:14 +0100
Subject: [PATCH 01/15] Test and doc for n_features_in_ for sklearn.calibration

---
 sklearn/calibration.py                     | 3 +++
 sklearn/tests/test_common.py               | 1 -
 sklearn/tests/test_docstring_parameters.py | 1 -
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index b60a415b4419b..85decdd76ee84 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -141,6 +141,9 @@ class CalibratedClassifierCV(ClassifierMixin,
     classes_ : ndarray of shape (n_classes,)
         The class labels.
 
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
     calibrated_classifiers_ : list (len() equal to cv or 1 if `cv="prefit"` \
             or `ensemble=False`)
         The list of classifier and calibrator pairs.
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index bfd7f98268350..45943ad0b007c 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -261,7 +261,6 @@ def test_search_cv(estimator, check, request):
 #
 # check_classifiers_train would need to be updated with the error message
 N_FEATURES_IN_AFTER_FIT_MODULES_TO_IGNORE = {
-    'calibration',
     'compose',
     'feature_extraction',
     'mixture',
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index cd2bdba449799..5c2e02bd3e247 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -176,7 +176,6 @@ def _construct_searchcv_instance(SearchCV):
 
 
 N_FEATURES_MODULES_TO_IGNORE = {
-    'calibration',
     'cluster',
     'compose',
     'covariance',

From 24d8e90b3e494b87af8752a799c3631ee09a2911 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 17 Mar 2021 16:49:05 +0100
Subject: [PATCH 02/15] CalibratedClassifierCV delegates n_features_in_
 validation to base_estimator

---
 sklearn/calibration.py            | 40 +++++++++++++++++++++----------
 sklearn/tests/test_calibration.py | 22 ++++++++++++++---
 2 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 85decdd76ee84..96c1331c381e5 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -33,6 +33,8 @@
 from .utils.fixes import delayed
 from .utils.validation import check_is_fitted, check_consistent_length
 from .utils.validation import _check_sample_weight, _num_samples
+from .utils.validation import _num_features
+from .utils import _safe_indexing
 from .pipeline import Pipeline
 from .isotonic import IsotonicRegression
 from .svm import LinearSVC
@@ -144,6 +146,8 @@ class CalibratedClassifierCV(ClassifierMixin,
     n_features_in_ : int
         Number of features seen during :term:`fit`.
 
+        .. versionadded:: 0.24
+
     calibrated_classifiers_ : list (len() equal to cv or 1 if `cv="prefit"` \
             or `ensemble=False`)
         The list of classifier and calibrator pairs.
@@ -259,8 +263,20 @@ def fit(self, X, y, sample_weight=None):
                 check_is_fitted(self.base_estimator[-1])
             else:
                 check_is_fitted(self.base_estimator)
+            try:
+                self.n_features_in_ = _num_features(X)
+            except TypeError:
+                # X is not necessarily tabular as base_estimator might be able
+                # to accept non-tabular data.
+                pass
             with suppress(AttributeError):
-                self.n_features_in_ = base_estimator.n_features_in_
+                if self.n_features_in_ != base_estimator.n_features_in_:
+                    raise ValueError(
+                        f"Base estimator {base_estimator.__class__.__name__} "
+                        f"was prefit on {base_estimator.n_features_in_} "
+                        f"features but CalibratedClassifierCV is fit "
+                        f"with {self.n_features_in_} features."
+                    )
             self.classes_ = self.base_estimator.classes_
 
             pred_method = _get_prediction_method(base_estimator)
@@ -273,10 +289,6 @@ def fit(self, X, y, sample_weight=None):
             )
             self.calibrated_classifiers_.append(calibrated_classifier)
         else:
-            X, y = self._validate_data(
-                X, y, accept_sparse=['csc', 'csr', 'coo'],
-                force_all_finite=False, allow_nd=True
-            )
             # Set `classes_` using all `y`
             label_encoder_ = LabelEncoder().fit(y)
             self.classes_ = label_encoder_.classes_
@@ -337,6 +349,9 @@ def fit(self, X, y, sample_weight=None):
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
+            first_clf = next(iter(self.calibrated_classifiers_)).base_estimator
+            if hasattr(first_clf, "n_features_in_"):
+                self.n_features_in_ = first_clf.n_features_in_
         return self
 
     def predict_proba(self, X):
@@ -355,7 +370,6 @@ def predict_proba(self, X):
             The predicted probas.
         """
         check_is_fitted(self)
-
         # Compute the arithmetic mean of the predictions of the calibrated
         # classifiers
         mean_proba = np.zeros((_num_samples(X), len(self.classes_)))
@@ -434,19 +448,21 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw,
     -------
     calibrated_classifier : _CalibratedClassifier instance
     """
+    X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train)
+    X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test)
     if sample_weight is not None and supports_sw:
-        estimator.fit(X[train], y[train],
-                      sample_weight=sample_weight[train])
+        estimator.fit(X_train, y_train,
+                      sample_weight=_safe_indexing(sample_weight, train))
     else:
-        estimator.fit(X[train], y[train])
+        estimator.fit(X_train, y_train)
 
     n_classes = len(classes)
     pred_method = _get_prediction_method(estimator)
-    predictions = _compute_predictions(pred_method, X[test], n_classes)
+    predictions = _compute_predictions(pred_method, X_test, n_classes)
 
-    sw = None if sample_weight is None else sample_weight[test]
+    sw = None if sample_weight is None else _safe_indexing(sample_weight, test)
     calibrated_classifier = _fit_calibrator(
-        estimator, predictions, y[test], classes, method, sample_weight=sw
+        estimator, predictions, y_test, classes, method, sample_weight=sw
     )
     return calibrated_classifier
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 86a638c4a7679..c870a747c576e 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -2,6 +2,7 @@
 # License: BSD 3 clause
 
 import pytest
+import re
 import numpy as np
 from numpy.testing import assert_allclose
 from scipy import sparse
@@ -14,6 +15,7 @@
                                     assert_almost_equal,
                                     assert_array_equal,
                                     ignore_warnings)
+from sklearn.utils.validation import _num_features
 from sklearn.utils.extmath import softmax
 from sklearn.exceptions import NotFittedError
 from sklearn.datasets import make_classification, make_blobs
@@ -547,9 +549,7 @@ def test_calibration_pipeline(text_data, text_data_pipeline):
     calib_clf.fit(X, y)
     # Check attributes are obtained from fitted estimator
     assert_array_equal(calib_clf.classes_, clf.classes_)
-    msg = "'CalibratedClassifierCV' object has no attribute"
-    with pytest.raises(AttributeError, match=msg):
-        calib_clf.n_features_in_
+    assert _num_features(X) == calib_clf.n_features_in_
 
     # Ensure that no error is thrown with predict and predict_proba
     calib_clf.predict(X)
@@ -578,6 +578,22 @@ def test_calibration_attributes(clf, cv):
         assert calib_clf.n_features_in_ == X.shape[1]
 
 
+def test_calibration_inconsistent_prefit_n_features_in():
+    # Check that `n_features_in_` from prefit base estimator
+    # is consistent with training set
+    X, y = make_classification(n_samples=10, n_features=5,
+                               n_classes=2, random_state=7)
+    clf = LinearSVC(C=1).fit(X, y)
+    calib_clf = CalibratedClassifierCV(clf, cv='prefit')
+
+    msg = re.escape(
+        "Base estimator LinearSVC was prefit on 5 features "
+        "but CalibratedClassifierCV is fit with 3 features."
+    )
+    with pytest.raises(ValueError, match=msg):
+        calib_clf.fit(X[:, :3], y)
+
+
 # FIXME: remove in 1.1
 def test_calibrated_classifier_cv_deprecation(data):
     # Check that we raise the proper deprecation warning if accessing

From 4a2ce3f0dd98cf4d8e934b9efa32ead67fdb8689 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 17 Mar 2021 18:02:00 +0100
Subject: [PATCH 03/15] Better n_features_in_ handling

---
 sklearn/calibration.py            | 21 ++++++++++----------
 sklearn/tests/test_calibration.py | 33 ++++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 96c1331c381e5..8ae9397466543 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -144,7 +144,8 @@ class CalibratedClassifierCV(ClassifierMixin,
         The class labels.
 
     n_features_in_ : int
-        Number of features seen during :term:`fit`.
+        Number of features seen during :term:`fit`. Only defined if the
+        underlying base_estimator exposes such an attribute when fit.
 
         .. versionadded:: 0.24
 
@@ -263,20 +264,20 @@ def fit(self, X, y, sample_weight=None):
                 check_is_fitted(self.base_estimator[-1])
             else:
                 check_is_fitted(self.base_estimator)
-            try:
-                self.n_features_in_ = _num_features(X)
-            except TypeError:
-                # X is not necessarily tabular as base_estimator might be able
-                # to accept non-tabular data.
-                pass
             with suppress(AttributeError):
-                if self.n_features_in_ != base_estimator.n_features_in_:
+                # Only perform the check and set self.n_features_in_ if the
+                # base estimator has the attribute defined.
+                expected_n_features_in = base_estimator.n_features_in_
+                actual_n_features_in = _num_features(X)
+                if expected_n_features_in != actual_n_features_in:
                     raise ValueError(
                         f"Base estimator {base_estimator.__class__.__name__} "
-                        f"was prefit on {base_estimator.n_features_in_} "
+                        f"was prefit on {expected_n_features_in} "
                         f"features but CalibratedClassifierCV is fit "
-                        f"with {self.n_features_in_} features."
+                        f"with {actual_n_features_in} features."
                     )
+                else:
+                    self.n_features_in_ = actual_n_features_in
             self.classes_ = self.base_estimator.classes_
 
             pred_method = _get_prediction_method(base_estimator)
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index c870a747c576e..281ff240367aa 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -514,19 +514,19 @@ def decision_function(self, X):
 
 
 @pytest.fixture
-def text_data():
-    text_data = [
+def dict_data():
+    dict_data = [
         {'state': 'NY', 'age': 'adult'},
         {'state': 'TX', 'age': 'adult'},
         {'state': 'VT', 'age': 'child'},
     ]
     text_labels = [1, 0, 1]
-    return text_data, text_labels
+    return dict_data, text_labels
 
 
 @pytest.fixture
-def text_data_pipeline(text_data):
-    X, y = text_data
+def dict_data_pipeline(dict_data):
+    X, y = dict_data
     pipeline_prefit = Pipeline([
         ('vectorizer', DictVectorizer()),
         ('clf', RandomForestClassifier())
@@ -534,7 +534,7 @@ def text_data_pipeline(text_data):
     return pipeline_prefit.fit(X, y)
 
 
-def test_calibration_pipeline(text_data, text_data_pipeline):
+def test_calibration_dict_pipeline(dict_data, dict_data_pipeline):
     """Test that calibration works in prefit pipeline with transformer
 
     `X` is not array-like, sparse matrix or dataframe at the start.
@@ -543,13 +543,19 @@ def test_calibration_pipeline(text_data, text_data_pipeline):
     Also test it can predict without running into validation errors.
     See https://github.com/scikit-learn/scikit-learn/issues/19637
     """
-    X, y = text_data
-    clf = text_data_pipeline
+    X, y = dict_data
+    clf = dict_data_pipeline
     calib_clf = CalibratedClassifierCV(clf, cv='prefit')
     calib_clf.fit(X, y)
     # Check attributes are obtained from fitted estimator
     assert_array_equal(calib_clf.classes_, clf.classes_)
-    assert _num_features(X) == calib_clf.n_features_in_
+
+    # Neither the pipeline nor the calibration meta-estimator
+    # expose the n_features_in_ check on this kind of data.
+    with pytest.raises(AttributeError):
+        clf.n_features_in_
+    with pytest.raises(AttributeError):
+        calib_clf.n_features_in_
 
     # Ensure that no error is thrown with predict and predict_proba
     calib_clf.predict(X)
@@ -566,6 +572,7 @@ def test_calibration_attributes(clf, cv):
                                n_classes=2, random_state=7)
     if cv == 'prefit':
         clf = clf.fit(X, y)
+
     calib_clf = CalibratedClassifierCV(clf, cv=cv)
     calib_clf.fit(X, y)
 
@@ -577,6 +584,14 @@ def test_calibration_attributes(clf, cv):
         assert_array_equal(calib_clf.classes_, classes)
         assert calib_clf.n_features_in_ == X.shape[1]
 
+    if cv == "prefit":
+        msg = (
+            "Base estimator LinearSVC was prefit on 5 features but "
+            "CalibratedClassifierCV is fit with 10 features."
+        )
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            calib_clf.fit(np.concatenate([X, X], axis=1), y)
+
 
 def test_calibration_inconsistent_prefit_n_features_in():
     # Check that `n_features_in_` from prefit base estimator

From 446aff17f1af3aeab62d7fec02e91e224a6097e6 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 17 Mar 2021 18:14:56 +0100
Subject: [PATCH 04/15] Useless import

---
 sklearn/tests/test_calibration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 281ff240367aa..4ebe8e4f7eae0 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -15,7 +15,6 @@
                                     assert_almost_equal,
                                     assert_array_equal,
                                     ignore_warnings)
-from sklearn.utils.validation import _num_features
 from sklearn.utils.extmath import softmax
 from sklearn.exceptions import NotFittedError
 from sklearn.datasets import make_classification, make_blobs

From d4f51187871591ad29f71386eb14556011699bae Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 17 Mar 2021 18:19:47 +0100
Subject: [PATCH 05/15] Update sklearn/tests/test_calibration.py

---
 sklearn/tests/test_calibration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 4ebe8e4f7eae0..e25ea3ded4a60 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -571,7 +571,6 @@ def test_calibration_attributes(clf, cv):
                                n_classes=2, random_state=7)
     if cv == 'prefit':
         clf = clf.fit(X, y)
-
     calib_clf = CalibratedClassifierCV(clf, cv=cv)
     calib_clf.fit(X, y)
 

From 8ccd12c180a135e3ec26c0bd32e31e0083d53f9b Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 17 Mar 2021 18:24:23 +0100
Subject: [PATCH 06/15] Update sklearn/tests/test_calibration.py

---
 sklearn/tests/test_calibration.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index e25ea3ded4a60..265cb0a6ca7d1 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -582,14 +582,6 @@ def test_calibration_attributes(clf, cv):
         assert_array_equal(calib_clf.classes_, classes)
         assert calib_clf.n_features_in_ == X.shape[1]
 
-    if cv == "prefit":
-        msg = (
-            "Base estimator LinearSVC was prefit on 5 features but "
-            "CalibratedClassifierCV is fit with 10 features."
-        )
-        with pytest.raises(ValueError, match=re.escape(msg)):
-            calib_clf.fit(np.concatenate([X, X], axis=1), y)
-
 
 def test_calibration_inconsistent_prefit_n_features_in():
     # Check that `n_features_in_` from prefit base estimator

From 057a1f9b447d47840c1cc6ccff5580e4e3ae8dde Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 17 Mar 2021 19:27:41 +0100
Subject: [PATCH 07/15] Update check_complex_data to generate data that is
 valid for stratified 3-fold

---
 sklearn/utils/estimator_checks.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 7561c64abe6a8..71f5b3b42de42 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -959,10 +959,13 @@ def check_dtype_object(name, estimator_orig):
 
 
 def check_complex_data(name, estimator_orig):
+    rng = np.random.RandomState(42)
     # check that estimators raise an exception on providing complex data
-    X = np.random.sample(10) + 1j * np.random.sample(10)
+    X = rng.uniform(size=10) + 1j * rng.uniform(size=10)
     X = X.reshape(-1, 1)
-    y = np.random.sample(10) + 1j * np.random.sample(10)
+
+    # Something both valid for classification and regression
+    y = rng.randint(low=0, high=2, size=10) + 1j
     estimator = clone(estimator_orig)
     with raises(ValueError, match="Complex data not supported"):
         estimator.fit(X, y)

From af0f3a90ba7bc04a3701234ad20038d4528812b6 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 29 Mar 2021 16:08:47 +0200
Subject: [PATCH 08/15] Delegate n_features_in_ check even when prefit

---
 sklearn/calibration.py            | 23 ++++-------------------
 sklearn/tests/test_calibration.py |  5 +----
 2 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 8ae9397466543..9cf65b730682c 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -258,26 +258,11 @@ def fit(self, X, y, sample_weight=None):
 
         self.calibrated_classifiers_ = []
         if self.cv == "prefit":
-            # `classes_` and `n_features_in_` should be consistent with that
-            # of base_estimator
+            # `classes_` should be consistent with that of base_estimator
             if isinstance(self.base_estimator, Pipeline):
                 check_is_fitted(self.base_estimator[-1])
             else:
                 check_is_fitted(self.base_estimator)
-            with suppress(AttributeError):
-                # Only perform the check and set self.n_features_in_ if the
-                # base estimator has the attribute defined.
-                expected_n_features_in = base_estimator.n_features_in_
-                actual_n_features_in = _num_features(X)
-                if expected_n_features_in != actual_n_features_in:
-                    raise ValueError(
-                        f"Base estimator {base_estimator.__class__.__name__} "
-                        f"was prefit on {expected_n_features_in} "
-                        f"features but CalibratedClassifierCV is fit "
-                        f"with {actual_n_features_in} features."
-                    )
-                else:
-                    self.n_features_in_ = actual_n_features_in
             self.classes_ = self.base_estimator.classes_
 
             pred_method = _get_prediction_method(base_estimator)
@@ -350,9 +335,9 @@ def fit(self, X, y, sample_weight=None):
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
-            first_clf = next(iter(self.calibrated_classifiers_)).base_estimator
-            if hasattr(first_clf, "n_features_in_"):
-                self.n_features_in_ = first_clf.n_features_in_
+        first_clf = next(iter(self.calibrated_classifiers_)).base_estimator
+        if hasattr(first_clf, "n_features_in_"):
+            self.n_features_in_ = first_clf.n_features_in_
         return self
 
     def predict_proba(self, X):
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 265cb0a6ca7d1..9bab9a678a19e 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -591,10 +591,7 @@ def test_calibration_inconsistent_prefit_n_features_in():
     clf = LinearSVC(C=1).fit(X, y)
     calib_clf = CalibratedClassifierCV(clf, cv='prefit')
 
-    msg = re.escape(
-        "Base estimator LinearSVC was prefit on 5 features "
-        "but CalibratedClassifierCV is fit with 3 features."
-    )
+    msg = "X has 3 features, but LinearSVC is expecting 5 features as input."
     with pytest.raises(ValueError, match=msg):
         calib_clf.fit(X[:, :3], y)
 

From 1425db0c2afb9dec3ebd4db254353de9c442cc18 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 29 Mar 2021 16:10:38 +0200
Subject: [PATCH 09/15] More intuitive check

---
 sklearn/tests/test_calibration.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 9bab9a678a19e..0b16fb6a16c90 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -551,10 +551,8 @@ def test_calibration_dict_pipeline(dict_data, dict_data_pipeline):
 
     # Neither the pipeline nor the calibration meta-estimator
     # expose the n_features_in_ check on this kind of data.
-    with pytest.raises(AttributeError):
-        clf.n_features_in_
-    with pytest.raises(AttributeError):
-        calib_clf.n_features_in_
+    assert not hasattr(clf, 'n_features_in_')
+    assert not hasattr(calib_clf, 'n_features_in_')
 
     # Ensure that no error is thrown with predict and predict_proba
     calib_clf.predict(X)

From aab7e7f32269bf04ef3a4446de388c0d406726f7 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 29 Mar 2021 16:12:00 +0200
Subject: [PATCH 10/15] More intuitive first classifier lookup

---
 sklearn/calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 9cf65b730682c..18c615281e144 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -335,7 +335,7 @@ def fit(self, X, y, sample_weight=None):
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
-        first_clf = next(iter(self.calibrated_classifiers_)).base_estimator
+        first_clf = self.calibrated_classifiers_[0].base_estimator
         if hasattr(first_clf, "n_features_in_"):
             self.n_features_in_ = first_clf.n_features_in_
         return self

From cb1548e01a90ec70f745d2c98f33a86b290efae7 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 29 Mar 2021 16:14:08 +0200
Subject: [PATCH 11/15] Remove unused imports

---
 sklearn/calibration.py            | 2 --
 sklearn/tests/test_calibration.py | 1 -
 2 files changed, 3 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 18c615281e144..4e4a9116eff90 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -9,7 +9,6 @@
 
 import warnings
 from inspect import signature
-from contextlib import suppress
 from functools import partial
 
 from math import log
@@ -33,7 +32,6 @@
 from .utils.fixes import delayed
 from .utils.validation import check_is_fitted, check_consistent_length
 from .utils.validation import _check_sample_weight, _num_samples
-from .utils.validation import _num_features
 from .utils import _safe_indexing
 from .pipeline import Pipeline
 from .isotonic import IsotonicRegression
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 0b16fb6a16c90..53d620b41031c 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -2,7 +2,6 @@
 # License: BSD 3 clause
 
 import pytest
-import re
 import numpy as np
 from numpy.testing import assert_allclose
 from scipy import sparse

From c52d2b7724cd6e943427ecb03ffe89f3f743b07e Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 29 Mar 2021 16:33:49 +0200
Subject: [PATCH 12/15] Simplify classes_ check on prefit pipeline

---
 sklearn/calibration.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 4e4a9116eff90..79d7f4b0961d0 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -257,10 +257,7 @@ def fit(self, X, y, sample_weight=None):
         self.calibrated_classifiers_ = []
         if self.cv == "prefit":
             # `classes_` should be consistent with that of base_estimator
-            if isinstance(self.base_estimator, Pipeline):
-                check_is_fitted(self.base_estimator[-1])
-            else:
-                check_is_fitted(self.base_estimator)
+            check_is_fitted(self.base_estimator, attributes=["classes_"])
             self.classes_ = self.base_estimator.classes_
 
             pred_method = _get_prediction_method(base_estimator)

From d85441ae9b3dec2ce9cbd2870efaae006e3bda7f Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 29 Mar 2021 16:37:34 +0200
Subject: [PATCH 13/15] One more useless import

---
 sklearn/calibration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 79d7f4b0961d0..da0c34d97dc01 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -33,7 +33,6 @@
 from .utils.validation import check_is_fitted, check_consistent_length
 from .utils.validation import _check_sample_weight, _num_samples
 from .utils import _safe_indexing
-from .pipeline import Pipeline
 from .isotonic import IsotonicRegression
 from .svm import LinearSVC
 from .model_selection import check_cv, cross_val_predict

From e166e0e1c1cc51c6f31c5f10b31b49fd5fa55a64 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 29 Mar 2021 18:28:03 +0200
Subject: [PATCH 14/15] style in indexing sample_weight

---
 sklearn/calibration.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index da0c34d97dc01..e32788a16b399 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -430,9 +430,15 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw,
     """
     X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train)
     X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test)
-    if sample_weight is not None and supports_sw:
-        estimator.fit(X_train, y_train,
-                      sample_weight=_safe_indexing(sample_weight, train))
+    if sample_weight is not None:
+        sw_train = _safe_indexing(sample_weight, train)
+        sw_test = _safe_indexing(sample_weight, test)
+    else:
+        sw_train = None
+        sw_test = None
+
+    if supports_sw:
+        estimator.fit(X_train, y_train, sample_weight=sw_train)
     else:
         estimator.fit(X_train, y_train)
 
@@ -440,9 +446,8 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw,
     pred_method = _get_prediction_method(estimator)
     predictions = _compute_predictions(pred_method, X_test, n_classes)
 
-    sw = None if sample_weight is None else _safe_indexing(sample_weight, test)
     calibrated_classifier = _fit_calibrator(
-        estimator, predictions, y_test, classes, method, sample_weight=sw
+        estimator, predictions, y_test, classes, method, sample_weight=sw_test
     )
     return calibrated_classifier
 

From 1ff64be6efc22d48abebf0c1020655a5a0e50a33 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 30 Mar 2021 17:46:23 +0200
Subject: [PATCH 15/15] Update sklearn/calibration.py

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 sklearn/calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index e32788a16b399..c6289d1df2936 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -430,7 +430,7 @@ def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw,
     """
     X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train)
     X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test)
-    if sample_weight is not None:
+    if supports_sw and sample_weight is not None:
         sw_train = _safe_indexing(sample_weight, train)
         sw_test = _safe_indexing(sample_weight, test)
     else: