From 5b248deda088f73a81b9a3a2befe9f32e07026c1 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 24 Jun 2020 17:08:11 +0200
Subject: [PATCH 01/44] wip

---
 sklearn/calibration.py            | 113 ++++++++++++++++++++++++++++--
 sklearn/tests/test_calibration.py |   9 +++
 2 files changed, 118 insertions(+), 4 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index cd9bbd41ead17..0a33911270129 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -87,6 +87,19 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
         .. versionchanged:: 0.22
             ``cv`` default value if None changed from 3-fold to 5-fold.
 
+    ensemble : bool, default=False
+        Determines how the calibrator is fit, if ``cv`` is not ``'prefit'``.
+
+        If ``True``, the ``base_estimator`` is fit and calibrated on each
+        ``cv`` fold. The final estimator is an ensemble that outputs the
+        average predicted probabilities of all such estimators.
+
+        If ``False`, ``cv`` is used to compute predictions
+        Note this method is implemented when ``probabilities=True`` for
+        :mod:`sklearn.svm` estimators.
+
+        .. versionadded:: 0.24
+
     Attributes
     ----------
     classes_ : array, shape (n_classes)
@@ -152,10 +165,12 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
     @_deprecate_positional_args
-    def __init__(self, base_estimator=None, *, method='sigmoid', cv=None):
+    def __init__(self, base_estimator=None, *, method='sigmoid', cv=None,
+                 ensemble=False):
         self.base_estimator = base_estimator
         self.method = method
         self.cv = cv
+        self.ensemble = ensemble
 
     def fit(self, X, y, sample_weight=None):
         """Fit the calibrated model
@@ -179,7 +194,7 @@ def fit(self, X, y, sample_weight=None):
         X, y = self._validate_data(X, y, accept_sparse=['csc', 'csr', 'coo'],
                                    force_all_finite=False, allow_nd=True)
         X, y = indexable(X, y)
-        le = LabelBinarizer().fit(y)
+        le = LabelEncoder().fit(y)
         self.classes_ = le.classes_
 
         # Check that each cross-validation fold can have at least one
@@ -351,6 +366,8 @@ def _preproc(self, X):
                 df = df[:, np.newaxis]
         elif hasattr(self.base_estimator, "predict_proba"):
             df = self.base_estimator.predict_proba(X)
+            print(f'df {df[:5,:]}')
+            print(f'base est clases {self.base_estimator.classes_}')
             if n_classes == 2:
                 df = df[:, 1:]
         else:
@@ -389,12 +406,17 @@ def fit(self, X, y, sample_weight=None):
             self.label_encoder_.fit(self.classes)
 
         self.classes_ = self.label_encoder_.classes_
+        print(f'classes at _cal fit {self.classes_}')
         Y = label_binarize(y, classes=self.classes_)
+        # for bin y, this is 1d
+        print(f'shape of Y {Y.shape}')
 
         df, idx_pos_class = self._preproc(X)
         self.calibrators_ = []
-
+        print(f'idx_pos_class {idx_pos_class}')
+        print(f'df {df[:5,:]}')
         for k, this_df in zip(idx_pos_class, df.T):
+            print(f'k {k}')
             if self.method == 'isotonic':
                 calibrator = IsotonicRegression(out_of_bounds='clip')
             elif self.method == 'sigmoid':
@@ -427,7 +449,8 @@ def predict_proba(self, X):
         proba = np.zeros((X.shape[0], n_classes))
 
         df, idx_pos_class = self._preproc(X)
-
+        print(f'predict proba df shape {df.shape}')
+        print(f'num of calibrators {len(self.calibrators_)}')
         for k, this_df, calibrator in \
                 zip(idx_pos_class, df.T, self.calibrators_):
             if n_classes == 2:
@@ -449,6 +472,88 @@ def predict_proba(self, X):
         return proba
 
 
+def _preproc(base_estimator, classes, label_encoder, X):
+    n_classes = len(classes)
+    if hasattr(base_estimator, "decision_function"):
+        df = base_estimator.decision_function(X)
+        if df.ndim == 1:
+            df = df[:, np.newaxis]
+    elif hasattr(base_estimator, "predict_proba"):
+        df = base_estimator.predict_proba(X)
+        if n_classes == 2:
+            df = df[:, 1:]
+    else:
+        raise RuntimeError('classifier has no decision_function or '
+                            'predict_proba method.')
+
+    idx_pos_class = label_encoder_.\
+        transform(base_estimator.classes_)
+
+    return df, idx_pos_class
+
+
+def _fit_calibrator(fitted_classifier, method, y, X=None, sample_weight=None):
+    """Fit calibrator and return a `_CalibratedClassiferPipeline` instance.
+
+    Parameters
+    ----------
+    fitted_classifier : instance BaseEstimator
+        Fitted classifier.
+
+    method : {'sigmoid', 'isotonic'}
+        The method to use for calibration.
+
+    y : ndarray, shape (n_samples,)
+        The targets.
+
+    X : array-like, shape (n_samples, n_features)
+        Training data.
+
+    sample_weight : ndarray, shape (n_samples,), default=None
+        Sample weights. If ``None``, then samples are equally weighted.
+
+    Returns
+    -------
+    ``_CalibratedClassiferPipeline`` instance.
+    """
+    label_encoder_ = LabelEncoder()
+    if classes is None:
+        label_encoder_.fit(y)
+    else:
+        label_encoder_.fit(classes)
+
+    classes = label_encoder_.classes_
+
+    if method == 'isotonic':
+        calibrator = IsotonicRegression(out_of_bounds='clip')
+    elif method == 'sigmoid':
+        calibrator = _SigmoidCalibration()
+
+    calibrator.fit(df, y)
+
+    return _CalibratedClassiferPipeline(fitted_classifier, calibrator)
+
+
+class _CalibratedClassiferPipeline:
+    """Pipeline chaining a classifier and it's calibrator."""
+
+    def __init__(self, clf_fitted, calibrator_fitted):
+        self.clf_fitted = clf_fitted
+        self.calibrator_fitted = calibrator_fitted
+
+    def predict_proba(self, X):
+        n_classes = len(self.classes_)
+        if hasattr(self.clf_fitted, "decision_function"):
+            df = self.clf_fitted.decision_function(X)
+            if df.ndim == 1:
+                df = df[:, np.newaxis]
+        elif hasattr(clf_fitted, "predict_proba"):
+            df = clf_fitted.predict_proba(X)
+            if n_classes == 2:
+                df = df[:, 1:]
+        return self.calibrator_fitted.predict_proba(df)
+
+
 def _sigmoid_calibration(df, y, sample_weight=None):
     """Probability Calibration with sigmoid method (Platt 2000)
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index f131eab4c1680..e5636d5f39f79 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -341,3 +341,12 @@ def decision_function(self, X):
     calibrated_clf = CalibratedClassifierCV(MockTensorClassifier())
     # we should be able to fit this classifier with no error
     calibrated_clf.fit(X, y)
+
+
+def test_ll():
+    n_samples = 100
+    X, y = make_classification(n_samples=n_samples, n_features=6,
+                               n_classes=3, n_informative=3, random_state=42)
+    clf = CalibratedClassifierCV(RandomForestClassifier(), cv=2)
+    clf.fit(X,y)
+    clf.predict_proba(X)

From ccf55fb71977394fa0816678a7c06696afc6e7d6 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 25 Jun 2020 11:14:06 +0200
Subject: [PATCH 02/44] refactor

---
 sklearn/calibration.py | 351 ++++++++++++++++-------------------------
 1 file changed, 135 insertions(+), 216 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 0a33911270129..18873cc45b08b 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -87,19 +87,6 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
         .. versionchanged:: 0.22
             ``cv`` default value if None changed from 3-fold to 5-fold.
 
-    ensemble : bool, default=False
-        Determines how the calibrator is fit, if ``cv`` is not ``'prefit'``.
-
-        If ``True``, the ``base_estimator`` is fit and calibrated on each
-        ``cv`` fold. The final estimator is an ensemble that outputs the
-        average predicted probabilities of all such estimators.
-
-        If ``False`, ``cv`` is used to compute predictions
-        Note this method is implemented when ``probabilities=True`` for
-        :mod:`sklearn.svm` estimators.
-
-        .. versionadded:: 0.24
-
     Attributes
     ----------
     classes_ : array, shape (n_classes)
@@ -191,11 +178,14 @@ def fit(self, X, y, sample_weight=None):
         self : object
             Returns an instance of self.
         """
+        supported_methods = ('sigmoid', 'isotonic')
+        if self.method not in (supported_methods):
+            raise ValueError(f"'method' should be one of: {supported_methods}."
+                             f" Got {self.method}.")
+
         X, y = self._validate_data(X, y, accept_sparse=['csc', 'csr', 'coo'],
                                    force_all_finite=False, allow_nd=True)
         X, y = indexable(X, y)
-        le = LabelEncoder().fit(y)
-        self.classes_ = le.classes_
 
         # Check that each cross-validation fold can have at least one
         # example per class
@@ -217,11 +207,20 @@ def fit(self, X, y, sample_weight=None):
             base_estimator = self.base_estimator
 
         if self.cv == "prefit":
-            calibrated_classifier = _CalibratedClassifier(
-                base_estimator, method=self.method)
-            calibrated_classifier.fit(X, y, sample_weight)
+            # Class attributes should be consistent with that of base_estimator
+            self.classes_ = base_estimator.classes_
+            self.label_encoder_ = LabelEncoder().fit(base_estimator.classes_)
+
+            calibrated_classifier = _fit_calibrator(
+                base_estimator, self.method, sample_weight)
             self.calibrated_classifiers_.append(calibrated_classifier)
+
         else:
+            # Class attributes set using all `y`
+            le = LabelEncoder().fit(y)
+            self.classes_ = le.classes_
+            self.label_encoder_ = le
+
             cv = check_cv(self.cv, y, classifier=True)
             fit_parameters = signature(base_estimator.fit).parameters
             base_estimator_supports_sw = "sample_weight" in fit_parameters
@@ -244,10 +243,11 @@ def fit(self, X, y, sample_weight=None):
                 else:
                     this_estimator.fit(X[train], y[train])
 
-                calibrated_classifier = _CalibratedClassifier(
-                    this_estimator, method=self.method, classes=self.classes_)
                 sw = None if sample_weight is None else sample_weight[test]
-                calibrated_classifier.fit(X[test], y[test], sample_weight=sw)
+                calibrated_classifier = _fit_calibrator(
+                    this_estimator, X[test], y[test], self.label_encoder_,
+                    self.method, sw
+                )
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
         return self
@@ -309,249 +309,168 @@ def _more_tags(self):
         }
 
 
-class _CalibratedClassifier:
-    """Probability calibration with isotonic regression or sigmoid.
+def _get_predictions(clf_fitted, X, label_encoder_):
+    """Returns predictions for `X` and index of classes present in `X`.
 
-    It assumes that base_estimator has already been fit, and trains the
-    calibration on the input set of the fit function. Note that this class
-    should not be used as an estimator directly. Use CalibratedClassifierCV
-    with cv="prefit" instead.
+    For predicitons, `decision_function` method of the `clf_fitted` is used.
+    If this does not exist, `predict_proba` method used.
 
     Parameters
     ----------
-    base_estimator : instance BaseEstimator
-        The classifier whose output decision function needs to be calibrated
-        to offer more accurate predict_proba outputs. No default value since
-        it has to be an already fitted estimator.
+    clf_fitted : Estimator instance
+        Fitted classifier instance.
 
-    method : 'sigmoid' | 'isotonic'
-        The method to use for calibration. Can be 'sigmoid' which
-        corresponds to Platt's method or 'isotonic' which is a
-        non-parametric approach based on isotonic regression.
-
-    classes : array-like, shape (n_classes,), optional
-            Contains unique classes used to fit the base estimator.
-            if None, then classes is extracted from the given target values
-            in fit().
-
-    See also
-    --------
-    CalibratedClassifierCV
+    X : array-like
+        Sample data used for the predictions.
 
-    References
-    ----------
-    .. [1] Obtaining calibrated probability estimates from decision trees
-           and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001
+    label_encoder_ : LabelEncoder instance
+        LabelEncoder instance fitted on all the targets.
 
-    .. [2] Transforming Classifier Scores into Accurate Multiclass
-           Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002)
-
-    .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to
-           Regularized Likelihood Methods, J. Platt, (1999)
+    Returns
+    -------
+    df : array-like, shape (X.shape[0], len(clf_fitted.classes_))
+        The predictions. Note array is of shape (X.shape[0], 1) when there are
+        2 classes.
 
-    .. [4] Predicting Good Probabilities with Supervised Learning,
-           A. Niculescu-Mizil & R. Caruana, ICML 2005
+    idx_pos_class : array-like, shape (n_classes,)
+        Indices of the classes present in `X`.
     """
-    @_deprecate_positional_args
-    def __init__(self, base_estimator, *, method='sigmoid', classes=None):
-        self.base_estimator = base_estimator
-        self.method = method
-        self.classes = classes
-
-    def _preproc(self, X):
-        n_classes = len(self.classes_)
-        if hasattr(self.base_estimator, "decision_function"):
-            df = self.base_estimator.decision_function(X)
-            if df.ndim == 1:
-                df = df[:, np.newaxis]
-        elif hasattr(self.base_estimator, "predict_proba"):
-            df = self.base_estimator.predict_proba(X)
-            print(f'df {df[:5,:]}')
-            print(f'base est clases {self.base_estimator.classes_}')
-            if n_classes == 2:
-                df = df[:, 1:]
-        else:
-            raise RuntimeError('classifier has no decision_function or '
-                               'predict_proba method.')
-
-        idx_pos_class = self.label_encoder_.\
-            transform(self.base_estimator.classes_)
-
-        return df, idx_pos_class
-
-    def fit(self, X, y, sample_weight=None):
-        """Calibrate the fitted model
-
-        Parameters
-        ----------
-        X : array-like, shape (n_samples, n_features)
-            Training data.
-
-        y : array-like, shape (n_samples,)
-            Target values.
-
-        sample_weight : array-like of shape (n_samples,), default=None
-            Sample weights. If None, then samples are equally weighted.
-
-        Returns
-        -------
-        self : object
-            Returns an instance of self.
-        """
-
-        self.label_encoder_ = LabelEncoder()
-        if self.classes is None:
-            self.label_encoder_.fit(y)
-        else:
-            self.label_encoder_.fit(self.classes)
-
-        self.classes_ = self.label_encoder_.classes_
-        print(f'classes at _cal fit {self.classes_}')
-        Y = label_binarize(y, classes=self.classes_)
-        # for bin y, this is 1d
-        print(f'shape of Y {Y.shape}')
-
-        df, idx_pos_class = self._preproc(X)
-        self.calibrators_ = []
-        print(f'idx_pos_class {idx_pos_class}')
-        print(f'df {df[:5,:]}')
-        for k, this_df in zip(idx_pos_class, df.T):
-            print(f'k {k}')
-            if self.method == 'isotonic':
-                calibrator = IsotonicRegression(out_of_bounds='clip')
-            elif self.method == 'sigmoid':
-                calibrator = _SigmoidCalibration()
-            else:
-                raise ValueError('method should be "sigmoid" or '
-                                 '"isotonic". Got %s.' % self.method)
-            calibrator.fit(this_df, Y[:, k], sample_weight)
-            self.calibrators_.append(calibrator)
-
-        return self
-
-    def predict_proba(self, X):
-        """Posterior probabilities of classification
-
-        This function returns posterior probabilities of classification
-        according to each class on an array of test vectors X.
-
-        Parameters
-        ----------
-        X : array-like, shape (n_samples, n_features)
-            The samples.
-
-        Returns
-        -------
-        C : array, shape (n_samples, n_classes)
-            The predicted probas. Can be exact zeros.
-        """
-        n_classes = len(self.classes_)
-        proba = np.zeros((X.shape[0], n_classes))
-
-        df, idx_pos_class = self._preproc(X)
-        print(f'predict proba df shape {df.shape}')
-        print(f'num of calibrators {len(self.calibrators_)}')
-        for k, this_df, calibrator in \
-                zip(idx_pos_class, df.T, self.calibrators_):
-            if n_classes == 2:
-                k += 1
-            proba[:, k] = calibrator.predict(this_df)
-
-        # Normalize the probabilities
-        if n_classes == 2:
-            proba[:, 0] = 1. - proba[:, 1]
-        else:
-            proba /= np.sum(proba, axis=1)[:, np.newaxis]
-
-        # XXX : for some reason all probas can be 0
-        proba[np.isnan(proba)] = 1. / n_classes
-
-        # Deal with cases where the predicted probability minimally exceeds 1.0
-        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0
-
-        return proba
-
-
-def _preproc(base_estimator, classes, label_encoder, X):
-    n_classes = len(classes)
-    if hasattr(base_estimator, "decision_function"):
-        df = base_estimator.decision_function(X)
+    if hasattr(clf_fitted, "decision_function"):
+        df = clf_fitted.decision_function(X)
         if df.ndim == 1:
             df = df[:, np.newaxis]
-    elif hasattr(base_estimator, "predict_proba"):
-        df = base_estimator.predict_proba(X)
-        if n_classes == 2:
+    elif hasattr(clf_fitted, "predict_proba"):
+        df = clf_fitted.predict_proba(X)
+        if len(label_encoder_.classes_) == 2:
             df = df[:, 1:]
     else:
-        raise RuntimeError('classifier has no decision_function or '
-                            'predict_proba method.')
+        raise RuntimeError("'base_estimator' has no 'decision_function' or "
+                           "'predict_proba' method.")
 
-    idx_pos_class = label_encoder_.\
-        transform(base_estimator.classes_)
+    idx_pos_class = label_encoder_.transform(clf_fitted.classes_)
 
     return df, idx_pos_class
 
 
-def _fit_calibrator(fitted_classifier, method, y, X=None, sample_weight=None):
-    """Fit calibrator and return a `_CalibratedClassiferPipeline` instance.
+def _fit_calibrator(clf_fitted, X, y, label_encoder_, method,
+                    sample_weight=None):
+    """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
+    instance.
+
+    Output of the `decision_function` method of the `clf_fitted` is used for
+    calibration. If this method does not exist for `clf_fitted`,
+    `predict_proba` method used.
 
     Parameters
     ----------
-    fitted_classifier : instance BaseEstimator
+    clf_fitted : Estimator instance
         Fitted classifier.
 
-    method : {'sigmoid', 'isotonic'}
-        The method to use for calibration.
+    X : array-like
+        Sample data used to calibrate predictions.
 
     y : ndarray, shape (n_samples,)
         The targets.
 
-    X : array-like, shape (n_samples, n_features)
-        Training data.
+    label_encoder_ : LabelEncoder instance
+        LabelEncoder instance fitted on all the targets.
+
+    method : {'sigmoid', 'isotonic'}
+        The method to use for calibration.
 
     sample_weight : ndarray, shape (n_samples,), default=None
-        Sample weights. If ``None``, then samples are equally weighted.
+        Sample weights. If `None`, then samples are equally weighted.
 
     Returns
     -------
-    ``_CalibratedClassiferPipeline`` instance.
+    pipeline : _CalibratedClassiferPipeline instance
     """
-    label_encoder_ = LabelEncoder()
-    if classes is None:
-        label_encoder_.fit(y)
-    else:
-        label_encoder_.fit(classes)
-
-    classes = label_encoder_.classes_
+    Y = label_binarize(y, classes=label_encoder_.classes_)
+    df, idx_pos_class = _get_predictions(clf_fitted, X, label_encoder_)
 
     if method == 'isotonic':
         calibrator = IsotonicRegression(out_of_bounds='clip')
     elif method == 'sigmoid':
         calibrator = _SigmoidCalibration()
 
-    calibrator.fit(df, y)
+    calibrated_classifiers = []
+    for idx, this_df in zip(idx_pos_class, df.T):
+        if self.method == 'isotonic':
+            calibrator = IsotonicRegression(out_of_bounds='clip')
+        elif self.method == 'sigmoid':
+            calibrator = _SigmoidCalibration()
+
+        calibrator.fit(this_df, Y[:, indx], sample_weight)
+        calibrated_classifiers.append(calibrator)
 
-    return _CalibratedClassiferPipeline(fitted_classifier, calibrator)
+    pipeline = _CalibratedClassiferPipeline(
+        clf_fitted, calibrated_classifiers, label_encoder_
+    )
+    return pipeline
 
 
 class _CalibratedClassiferPipeline:
-    """Pipeline chaining a classifier and it's calibrator."""
+    """Pipeline chaining a fitted classifier and it's fitted calibrators.
+
+    Parameters
+    ----------
+    clf_fitted : Estimator instance
+        Fitted classifier.
 
-    def __init__(self, clf_fitted, calibrator_fitted):
+    calibrators_fitted : List of fitted estimator instances
+        List of fitted calibrators (either 'IsotonicRegression' or
+        '_SigmoidCalibration'). Number of calibrators equals the number of
+        classes. However, if there are 2 classes, list contains only one
+        fitted calibrator.
+    """
+    def __init__(self, clf_fitted, calibrators_fitted, label_encoder_):
         self.clf_fitted = clf_fitted
-        self.calibrator_fitted = calibrator_fitted
+        self.calibrators_fitted = calibrator_fitted
+        self.label_encoder_ = label_encoder_
 
     def predict_proba(self, X):
-        n_classes = len(self.classes_)
-        if hasattr(self.clf_fitted, "decision_function"):
-            df = self.clf_fitted.decision_function(X)
-            if df.ndim == 1:
-                df = df[:, np.newaxis]
-        elif hasattr(clf_fitted, "predict_proba"):
-            df = clf_fitted.predict_proba(X)
+        """Calculate posterior (calibrated) probabilities.
+
+        Calculates classification posterior (calibrated) probabilities
+        for each class, in a one-vs-all manner, for `X`.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            The sample data.
+
+        Returns
+        -------
+        proba : array, shape (n_samples, n_classes)
+            The predicted probabilities. Can be exact zeros.
+        """
+        n_classes = len(self.label_encoder_.classes_)
+        df, idx_pos_class = _get_predictions(
+            self.clf_fitted, X, self.label_encoder_
+        )
+
+        proba = np.zeros((X.shape[0], n_classes))
+        for idx, this_df, calibrator in \
+                zip(idx_pos_class, df.T, self.calibrators_fitted):
             if n_classes == 2:
-                df = df[:, 1:]
-        return self.calibrator_fitted.predict_proba(df)
+                # When binary, proba of self.label_encoder_.classes_[1]
+                # calculated but `idx_pos_class` = 0
+                idx += 1
+            proba[:, idx] = calibrator.predict(this_df)
+
+        # Normalize the probabilities
+        if n_classes == 2:
+            proba[:, 0] = 1. - proba[:, 1]
+        else:
+            proba /= np.sum(proba, axis=1)[:, np.newaxis]
+
+        # XXX : for some reason all probas can be 0
+        proba[np.isnan(proba)] = 1. / n_classes
+
+        # Deal with cases where the predicted probability minimally exceeds 1.0
+        proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0
+
+        return proba
 
 
 def _sigmoid_calibration(df, y, sample_weight=None):

From 2b87d0bbf0b717e94e103145cf1c02d97235a5f4 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 1 Jul 2020 10:55:52 +0200
Subject: [PATCH 03/44] clean up, tests pass

---
 sklearn/calibration.py            | 41 +++++++++++++++----------------
 sklearn/tests/test_calibration.py |  2 +-
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 902c7fb337ee9..076bb488454ea 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -154,12 +154,10 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
     @_deprecate_positional_args
-    def __init__(self, base_estimator=None, *, method='sigmoid', cv=None,
-                 ensemble=False):
+    def __init__(self, base_estimator=None, *, method='sigmoid', cv=None):
         self.base_estimator = base_estimator
         self.method = method
         self.cv = cv
-        self.ensemble = ensemble
 
     def fit(self, X, y, sample_weight=None):
         """Fit the calibrated model
@@ -184,7 +182,6 @@ def fit(self, X, y, sample_weight=None):
         if self.method not in (supported_methods):
             raise ValueError(f"'method' should be one of: {supported_methods}."
                              f" Got {self.method}.")
-
         X, y = indexable(X, y)
 
         self.calibrated_classifiers_ = []
@@ -205,16 +202,19 @@ def fit(self, X, y, sample_weight=None):
             with suppress(AttributeError):
                 self.n_features_in_ = base_estimator.n_features_in_
             self.classes_ = self.base_estimator.classes_
+            self.label_encoder_ = LabelEncoder().fit(self.classes_)
 
             calibrated_classifier = _fit_calibrator(
-                base_estimator, self.method, sample_weight)
+                base_estimator, self.label_encoder_, self.method, X, y,
+                sample_weight
+            )
             self.calibrated_classifiers_.append(calibrated_classifier)
         else:
             X, y = self._validate_data(
                 X, y, accept_sparse=['csc', 'csr', 'coo'],
                 force_all_finite=False, allow_nd=True
             )
-            # Class attributes set using all `y`
+            # Set attributes using all `y`
             le = LabelEncoder().fit(y)
             self.classes_ = le.classes_
             self.label_encoder_ = le
@@ -257,8 +257,8 @@ def fit(self, X, y, sample_weight=None):
 
                 sw = None if sample_weight is None else sample_weight[test]
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, X[test], y[test], self.label_encoder_,
-                    self.method, sw
+                    this_estimator, self.label_encoder_, self.method,
+                    X[test], y[test], sw
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
@@ -364,32 +364,31 @@ def _get_predictions(clf_fitted, X, label_encoder_):
     return df, idx_pos_class
 
 
-def _fit_calibrator(clf_fitted, X, y, label_encoder_, method,
+def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
                     sample_weight=None):
     """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
     instance.
 
     Output of the `decision_function` method of the `clf_fitted` is used for
-    calibration. If this method does not exist for `clf_fitted`,
-    `predict_proba` method used.
+    calibration. If this method does not exist, `predict_proba` method used.
 
     Parameters
     ----------
     clf_fitted : Estimator instance
         Fitted classifier.
 
-    X : array-like
-        Sample data used to calibrate predictions.
-
-    y : ndarray, shape (n_samples,)
-        The targets.
-
     label_encoder_ : LabelEncoder instance
         LabelEncoder instance fitted on all the targets.
 
     method : {'sigmoid', 'isotonic'}
         The method to use for calibration.
 
+    X : array-like
+        Sample data used to calibrate predictions.
+
+    y : ndarray, shape (n_samples,)
+        The targets.
+
     sample_weight : ndarray, shape (n_samples,), default=None
         Sample weights. If `None`, then samples are equally weighted.
 
@@ -407,12 +406,12 @@ def _fit_calibrator(clf_fitted, X, y, label_encoder_, method,
 
     calibrated_classifiers = []
     for idx, this_df in zip(idx_pos_class, df.T):
-        if self.method == 'isotonic':
+        if method == 'isotonic':
             calibrator = IsotonicRegression(out_of_bounds='clip')
-        elif self.method == 'sigmoid':
+        elif method == 'sigmoid':
             calibrator = _SigmoidCalibration()
 
-        calibrator.fit(this_df, Y[:, indx], sample_weight)
+        calibrator.fit(this_df, Y[:, idx], sample_weight)
         calibrated_classifiers.append(calibrator)
 
     pipeline = _CalibratedClassiferPipeline(
@@ -437,7 +436,7 @@ class _CalibratedClassiferPipeline:
     """
     def __init__(self, clf_fitted, calibrators_fitted, label_encoder_):
         self.clf_fitted = clf_fitted
-        self.calibrators_fitted = calibrator_fitted
+        self.calibrators_fitted = calibrators_fitted
         self.label_encoder_ = label_encoder_
 
     def predict_proba(self, X):
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 9fd5f811cf90d..da93cd32a874e 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -109,7 +109,7 @@ def test_calibration_default_estimator():
     calib_clf = CalibratedClassifierCV(cv=2)
     calib_clf.fit(X, y)
 
-    base_est = calib_clf.calibrated_classifiers_[0].base_estimator
+    base_est = calib_clf.calibrated_classifiers_[0].clf_fitted
     assert isinstance(base_est, LinearSVC)
 
 

From 2ee24cdbd9bac4a1dc54d5b509117ff0bcef313c Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 1 Jul 2020 11:25:03 +0200
Subject: [PATCH 04/44] remove debugging test

---
 sklearn/tests/test_calibration.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index da93cd32a874e..553fcdd6dc225 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -432,12 +432,3 @@ def test_calibration_attributes(clf, cv):
         classes = LabelBinarizer().fit(y).classes_
         assert_array_equal(calib_clf.classes_, classes)
         assert calib_clf.n_features_in_ == X.shape[1]
-
-
-def test_ll():
-    n_samples = 100
-    X, y = make_classification(n_samples=n_samples, n_features=6,
-                               n_classes=3, n_informative=3, random_state=42)
-    clf = CalibratedClassifierCV(RandomForestClassifier(), cv=2)
-    clf.fit(X,y)
-    clf.predict_proba(X)

From 4ae878bee23260ecedbb939ec9740d9ff433b0a3 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 1 Jul 2020 11:28:06 +0200
Subject: [PATCH 05/44] lint

---
 sklearn/calibration.py            | 3 +--
 sklearn/tests/test_calibration.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 076bb488454ea..c175a8e21f9e1 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -17,11 +17,10 @@
 from scipy.special import expit
 from scipy.special import xlogy
 from scipy.optimize import fmin_bfgs
-from .preprocessing import LabelEncoder
 
 from .base import (BaseEstimator, ClassifierMixin, RegressorMixin, clone,
                    MetaEstimatorMixin)
-from .preprocessing import label_binarize, LabelBinarizer
+from .preprocessing import label_binarize, LabelEncoder
 from .utils import check_array, indexable, column_or_1d
 from .utils.validation import check_is_fitted, check_consistent_length
 from .utils.validation import _check_sample_weight
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 553fcdd6dc225..6181f0dd50027 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -14,7 +14,7 @@
                                     assert_raises, ignore_warnings)
 from sklearn.exceptions import NotFittedError
 from sklearn.datasets import make_classification, make_blobs
-from sklearn.preprocessing import LabelBinarizer
+from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection import KFold
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
@@ -429,6 +429,6 @@ def test_calibration_attributes(clf, cv):
         assert_array_equal(calib_clf.classes_, clf.classes_)
         assert calib_clf.n_features_in_ == clf.n_features_in_
     else:
-        classes = LabelBinarizer().fit(y).classes_
+        classes = LabelEncoder().fit(y).classes_
         assert_array_equal(calib_clf.classes_, classes)
         assert calib_clf.n_features_in_ == X.shape[1]

From 12cee4d2838e74f1096f88e56e89eefd307254a2 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 1 Jul 2020 15:04:34 +0200
Subject: [PATCH 06/44] suggestions

---
 sklearn/calibration.py | 55 ++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index c175a8e21f9e1..fc146b96b7025 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -177,10 +177,6 @@ def fit(self, X, y, sample_weight=None):
         self : object
             Returns an instance of self.
         """
-        supported_methods = ('sigmoid', 'isotonic')
-        if self.method not in (supported_methods):
-            raise ValueError(f"'method' should be one of: {supported_methods}."
-                             f" Got {self.method}.")
         X, y = indexable(X, y)
 
         self.calibrated_classifiers_ = []
@@ -264,9 +260,9 @@ def fit(self, X, y, sample_weight=None):
         return self
 
     def predict_proba(self, X):
-        """Posterior probabilities of classification
+        """Calibrated probabilities of classification
 
-        This function returns posterior probabilities of classification
+        This function returns calibrated probabilities of classification
         according to each class on an array of test vectors X.
 
         Parameters
@@ -343,7 +339,7 @@ def _get_predictions(clf_fitted, X, label_encoder_):
         The predictions. Note array is of shape (X.shape[0], 1) when there are
         2 classes.
 
-    idx_pos_class : array-like, shape (n_classes,)
+    pos_class_indices : array-like, shape (n_classes,)
         Indices of the classes present in `X`.
     """
     if hasattr(clf_fitted, "decision_function"):
@@ -358,9 +354,9 @@ def _get_predictions(clf_fitted, X, label_encoder_):
         raise RuntimeError("'base_estimator' has no 'decision_function' or "
                            "'predict_proba' method.")
 
-    idx_pos_class = label_encoder_.transform(clf_fitted.classes_)
+    pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
 
-    return df, idx_pos_class
+    return df, pos_class_indices
 
 
 def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
@@ -369,7 +365,7 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
     instance.
 
     Output of the `decision_function` method of the `clf_fitted` is used for
-    calibration. If this method does not exist, `predict_proba` method used.
+    calibration. If this method does not exist, `predict_proba` method is used.
 
     Parameters
     ----------
@@ -396,21 +392,18 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
     pipeline : _CalibratedClassiferPipeline instance
     """
     Y = label_binarize(y, classes=label_encoder_.classes_)
-    df, idx_pos_class = _get_predictions(clf_fitted, X, label_encoder_)
-
-    if method == 'isotonic':
-        calibrator = IsotonicRegression(out_of_bounds='clip')
-    elif method == 'sigmoid':
-        calibrator = _SigmoidCalibration()
+    df, pos_class_indices = _get_predictions(clf_fitted, X, label_encoder_)
 
     calibrated_classifiers = []
-    for idx, this_df in zip(idx_pos_class, df.T):
+    for class_idx, this_df in zip(pos_class_indices, df.T):
         if method == 'isotonic':
             calibrator = IsotonicRegression(out_of_bounds='clip')
         elif method == 'sigmoid':
             calibrator = _SigmoidCalibration()
-
-        calibrator.fit(this_df, Y[:, idx], sample_weight)
+        else:
+            raise ValueError("'method' should be one of: 'sigmoid' or "
+                             f"'isotonic'. Got {method}.")
+        calibrator.fit(this_df, Y[:, class_idx], sample_weight)
         calibrated_classifiers.append(calibrator)
 
     pipeline = _CalibratedClassiferPipeline(
@@ -420,7 +413,7 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
 
 
 class _CalibratedClassiferPipeline:
-    """Pipeline chaining a fitted classifier and it's fitted calibrators.
+    """Pipeline-like chaining a fitted classifier and its fitted calibrators.
 
     Parameters
     ----------
@@ -429,8 +422,8 @@ class _CalibratedClassiferPipeline:
 
     calibrators_fitted : List of fitted estimator instances
         List of fitted calibrators (either 'IsotonicRegression' or
-        '_SigmoidCalibration'). Number of calibrators equals the number of
-        classes. However, if there are 2 classes, list contains only one
+        '_SigmoidCalibration'). The number of calibrators equals the number of
+        classes. However, if there are 2 classes, the list contains only one
         fitted calibrator.
     """
     def __init__(self, clf_fitted, calibrators_fitted, label_encoder_):
@@ -439,9 +432,9 @@ def __init__(self, clf_fitted, calibrators_fitted, label_encoder_):
         self.label_encoder_ = label_encoder_
 
     def predict_proba(self, X):
-        """Calculate posterior (calibrated) probabilities.
+        """Calculate calibrated probabilities.
 
-        Calculates classification posterior (calibrated) probabilities
+        Calculates classification calibrated probabilities
         for each class, in a one-vs-all manner, for `X`.
 
         Parameters
@@ -455,18 +448,18 @@ def predict_proba(self, X):
             The predicted probabilities. Can be exact zeros.
         """
         n_classes = len(self.label_encoder_.classes_)
-        df, idx_pos_class = _get_predictions(
+        df, pos_class_indices = _get_predictions(
             self.clf_fitted, X, self.label_encoder_
         )
 
         proba = np.zeros((X.shape[0], n_classes))
-        for idx, this_df, calibrator in \
-                zip(idx_pos_class, df.T, self.calibrators_fitted):
+        for class_idx, this_df, calibrator in \
+                zip(pos_class_indices, df.T, self.calibrators_fitted):
             if n_classes == 2:
-                # When binary, proba of self.label_encoder_.classes_[1]
-                # calculated but `idx_pos_class` = 0
-                idx += 1
-            proba[:, idx] = calibrator.predict(this_df)
+                # When binary, proba of clf_fitted.classes_[1]
+                # output but `pos_class_indices` = 0
+                class_idx += 1
+            proba[:, class_idx] = calibrator.predict(this_df)
 
         # Normalize the probabilities
         if n_classes == 2:

From e1831316eef2e89bbac97283da08576e10d964c4 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 1 Jul 2020 16:58:03 +0200
Subject: [PATCH 07/44] check class y

---
 sklearn/calibration.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index fc146b96b7025..deab30c19df9e 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -22,6 +22,7 @@
                    MetaEstimatorMixin)
 from .preprocessing import label_binarize, LabelEncoder
 from .utils import check_array, indexable, column_or_1d
+from .utils.multiclass import check_classification_targets
 from .utils.validation import check_is_fitted, check_consistent_length
 from .utils.validation import _check_sample_weight
 from .pipeline import Pipeline
@@ -177,6 +178,7 @@ def fit(self, X, y, sample_weight=None):
         self : object
             Returns an instance of self.
         """
+        check_classification_targets(y)
         X, y = indexable(X, y)
 
         self.calibrated_classifiers_ = []

From 1b411f7d4240d8074bc4d8e03e9f625b259b76b8 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 1 Jul 2020 17:28:51 +0200
Subject: [PATCH 08/44] add att docstr

---
 sklearn/calibration.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index deab30c19df9e..b5003661d58c3 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -99,6 +99,14 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
         split, which has been fitted on training folds and
         calibrated on the testing fold.
 
+    n_features_in_ : int
+        The number of features in `X`. If `cv='prefit'`, number of features
+        in the data used to fit `base_estimator`.
+
+    label_encoder_ : LabelEncoder instance
+        `LabelEncoder` fitted on `y`. If `cv='prefit'`, `LabelEncoder`
+        fitted on `base_estimator.classes_`.
+
     Examples
     --------
     >>> from sklearn.datasets import make_classification

From b7247606d592d08be7317210f324a743df73fb67 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Tue, 7 Jul 2020 17:35:14 +0200
Subject: [PATCH 09/44] wip

---
 sklearn/calibration.py | 79 +++++++++++++++++++++++++++++++-----------
 1 file changed, 59 insertions(+), 20 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index b5003661d58c3..b93506ec4e7be 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -28,7 +28,7 @@
 from .pipeline import Pipeline
 from .isotonic import IsotonicRegression
 from .svm import LinearSVC
-from .model_selection import check_cv
+from .model_selection import check_cv, cross_val_predict
 from .utils.validation import _deprecate_positional_args
 
 
@@ -162,10 +162,12 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
     @_deprecate_positional_args
-    def __init__(self, base_estimator=None, *, method='sigmoid', cv=None):
+    def __init__(self, base_estimator=None, *, method='sigmoid', cv=None,
+                 ensemble=False):
         self.base_estimator = base_estimator
         self.method = method
         self.cv = cv
+        self.ensemble=ensemble
 
     def fit(self, X, y, sample_weight=None):
         """Fit the calibrated model
@@ -189,7 +191,6 @@ def fit(self, X, y, sample_weight=None):
         check_classification_targets(y)
         X, y = indexable(X, y)
 
-        self.calibrated_classifiers_ = []
         if self.base_estimator is None:
             # we want all classifiers that don't expose a random_state
             # to be deterministic (and we don't want to expose this one).
@@ -197,6 +198,7 @@ def fit(self, X, y, sample_weight=None):
         else:
             base_estimator = self.base_estimator
 
+        self.calibrated_classifiers_ = []
         if self.cv == "prefit":
             # `classes_` and `n_features_in_` should be consistent with that
             # of base_estimator
@@ -250,23 +252,51 @@ def fit(self, X, y, sample_weight=None):
                     warnings.warn("Since %s does not support sample_weights, "
                                   "sample weights will only be used for the "
                                   "calibration itself." % estimator_name)
+            if self.ensemble:
+                for train, test in cv.split(X, y):
+                    this_estimator = clone(base_estimator)
+
+                    if (sample_weight is not None
+                            and base_estimator_supports_sw):
+                        this_estimator.fit(X[train], y[train],
+                                           sample_weight=sample_weight[train])
+                    else:
+                        this_estimator.fit(X[train], y[train])
+
+                    sw = None if sample_weight is None else sample_weight[test]
+                    calibrated_classifier = _fit_calibrator(
+                        this_estimator, self.label_encoder_, self.method,
+                        X[test], y[test], sw
+                    )
+                    self.calibrated_classifiers_.append(calibrated_classifier)
+            else:
+                if hasattr(base_estimator, "decision_function"):
+                    base_estimator_method = "decision_function"
+                elif hasattr(base_estimator, "predict_proba"):
+                    base_estimator_method = "predict_proba"
+                else:
+                    raise RuntimeError("'base_estimator' as no "
+                                       "'decision_function' or 'predict_proba'"
+                                       " method.")
+                df = cross_val_predict(base_estimator, X, y, cv=cv,
+                                       method=base_estimator_method)
+                if base_estimator_method == "decision_function":
+                    if df.ndim == 1:
+                        df = df[:, np.newaxis]
+                else:
+                    if len(self.label_encoder_.classes_) == 2:
+                        df = df[:, 1:]
 
-            for train, test in cv.split(X, y):
                 this_estimator = clone(base_estimator)
-
                 if sample_weight is not None and base_estimator_supports_sw:
-                    this_estimator.fit(X[train], y[train],
-                                       sample_weight=sample_weight[train])
+                    this_estimator.fit(X, y, sample_weight=sample_weight)
                 else:
-                    this_estimator.fit(X[train], y[train])
-
-                sw = None if sample_weight is None else sample_weight[test]
+                    this_estimator.fit(X, y)
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, self.label_encoder_, self.method,
-                    X[test], y[test], sw
+                    this_estimator, self.label_encoder_, self.method, df, y,
+                    sample_weight
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
-
         return self
 
     def predict_proba(self, X):
@@ -327,9 +357,10 @@ def _more_tags(self):
 
 
 def _get_predictions(clf_fitted, X, label_encoder_):
-    """Returns predictions for `X` and index of classes present in `X`.
+    """Returns predictions for `X` and the index of classes present in
+    `clf_fitted`.
 
-    For predicitons, `decision_function` method of the `clf_fitted` is used.
+    For predictions, `decision_function` method of the `clf_fitted` is used.
     If this does not exist, `predict_proba` method used.
 
     Parameters
@@ -369,7 +400,7 @@ def _get_predictions(clf_fitted, X, label_encoder_):
     return df, pos_class_indices
 
 
-def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
+def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, df=None,
                     sample_weight=None):
     """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
     instance.
@@ -388,12 +419,17 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
     method : {'sigmoid', 'isotonic'}
         The method to use for calibration.
 
-    X : array-like
-        Sample data used to calibrate predictions.
-
     y : ndarray, shape (n_samples,)
         The targets.
 
+    X : array-like, shape (n_samples, n_features), default=None
+        Sample data used to calibrate predictions. If None, use df instead.
+
+    df :  array-like, shape (n_samples, n_classes), default=None
+        Predictions, output from `base_estimator`, used to calibrate
+        predictions. If None, use X instead.
+        If binary (i.e., `label_encoder_.classes_` = 2), shape (n_samples, 1)
+
     sample_weight : ndarray, shape (n_samples,), default=None
         Sample weights. If `None`, then samples are equally weighted.
 
@@ -402,7 +438,10 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, X, y,
     pipeline : _CalibratedClassiferPipeline instance
     """
     Y = label_binarize(y, classes=label_encoder_.classes_)
-    df, pos_class_indices = _get_predictions(clf_fitted, X, label_encoder_)
+    if X:
+        df, pos_class_indices = _get_predictions(clf_fitted, X, label_encoder_)
+    elif df:
+        pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
 
     calibrated_classifiers = []
     for class_idx, this_df in zip(pos_class_indices, df.T):

From dadefa7a01158f37ba0287522eabfc909a3e6728 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Tue, 7 Jul 2020 17:54:22 +0200
Subject: [PATCH 10/44] first iter, tests pass

---
 sklearn/calibration.py | 65 +++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index b93506ec4e7be..acf616fae708f 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -89,6 +89,24 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
         .. versionchanged:: 0.22
             ``cv`` default value if None changed from 3-fold to 5-fold.
 
+    ensemble : bool, default=True
+        Determines how the calibrator is fit, if `cv` is not `'prefit'`.
+
+        If `True`, the `base_estimator` is fit and calibrated on each
+        `cv` fold. The final estimator is an ensemble that outputs the
+        average predicted probabilities of all fitted classifier and calibrator
+        pairs.
+
+        If `False`, `cv` is used to compute unbiased predictions, which
+        are concatenated and used to train the calibrator (sigmoid or isotonic
+        model). The `base_estimator` trained on all the data is used at
+        prediction time.
+        Note this method is implemented when `probabilities=True` for
+        :mod:`sklearn.svm` estimators.
+
+        .. versionadded:: 0.24
+
+
     Attributes
     ----------
     classes_ : array, shape (n_classes)
@@ -163,7 +181,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
     """
     @_deprecate_positional_args
     def __init__(self, base_estimator=None, *, method='sigmoid', cv=None,
-                 ensemble=False):
+                 ensemble=True):
         self.base_estimator = base_estimator
         self.method = method
         self.cv = cv
@@ -212,8 +230,8 @@ def fit(self, X, y, sample_weight=None):
             self.label_encoder_ = LabelEncoder().fit(self.classes_)
 
             calibrated_classifier = _fit_calibrator(
-                base_estimator, self.label_encoder_, self.method, X, y,
-                sample_weight
+                base_estimator, self.label_encoder_, self.method, y=y, X=X,
+                sample_weight=sample_weight
             )
             self.calibrated_classifiers_.append(calibrated_classifier)
         else:
@@ -226,21 +244,6 @@ def fit(self, X, y, sample_weight=None):
             self.classes_ = le.classes_
             self.label_encoder_ = le
 
-            # Check that each cross-validation fold can have at least one
-            # example per class
-            if isinstance(self.cv, int):
-                n_folds = self.cv
-            elif hasattr(self.cv, "n_splits"):
-                n_folds = self.cv.n_splits
-            else:
-                n_folds = None
-            if n_folds and np.any([np.sum(y == class_) < n_folds
-                                   for class_ in self.classes_]):
-                raise ValueError(f"Requesting {n_folds}-fold cross-validation "
-                                 f"but provided less than {n_folds} examples "
-                                 "for at least one class.")
-
-            cv = check_cv(self.cv, y, classifier=True)
             fit_parameters = signature(base_estimator.fit).parameters
             base_estimator_supports_sw = "sample_weight" in fit_parameters
 
@@ -253,6 +256,22 @@ def fit(self, X, y, sample_weight=None):
                                   "sample weights will only be used for the "
                                   "calibration itself." % estimator_name)
             if self.ensemble:
+                # Check that each cross-validation fold can have at least one
+                # example per class
+                if isinstance(self.cv, int):
+                    n_folds = self.cv
+                elif hasattr(self.cv, "n_splits"):
+                    n_folds = self.cv.n_splits
+                else:
+                    n_folds = None
+                if n_folds and np.any([np.sum(y == class_) < n_folds
+                                    for class_ in self.classes_]):
+                    raise ValueError(f"Requesting {n_folds}-fold "
+                                     "cross-validation but provided less than "
+                                     f"{n_folds} examples for at least one "
+                                     "class.")
+                cv = check_cv(self.cv, y, classifier=True)
+
                 for train, test in cv.split(X, y):
                     this_estimator = clone(base_estimator)
 
@@ -266,7 +285,7 @@ def fit(self, X, y, sample_weight=None):
                     sw = None if sample_weight is None else sample_weight[test]
                     calibrated_classifier = _fit_calibrator(
                         this_estimator, self.label_encoder_, self.method,
-                        X[test], y[test], sw
+                        y=y[test], X=X[test], sample_weight=sw
                     )
                     self.calibrated_classifiers_.append(calibrated_classifier)
             else:
@@ -293,8 +312,8 @@ def fit(self, X, y, sample_weight=None):
                 else:
                     this_estimator.fit(X, y)
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, self.label_encoder_, self.method, df, y,
-                    sample_weight
+                    this_estimator, self.label_encoder_, self.method, y=y,
+                    df=df, sample_weight=sample_weight
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
         return self
@@ -438,9 +457,9 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, df=None,
     pipeline : _CalibratedClassiferPipeline instance
     """
     Y = label_binarize(y, classes=label_encoder_.classes_)
-    if X:
+    if X is not None:
         df, pos_class_indices = _get_predictions(clf_fitted, X, label_encoder_)
-    elif df:
+    elif df is not None:
         pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
 
     calibrated_classifiers = []

From 28fc1c00b079dd5c608f1387c82b83c02cf0f2b5 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Tue, 7 Jul 2020 18:02:01 +0200
Subject: [PATCH 11/44] lint

---
 sklearn/calibration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index acf616fae708f..5e599c81790af 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -185,7 +185,7 @@ def __init__(self, base_estimator=None, *, method='sigmoid', cv=None,
         self.base_estimator = base_estimator
         self.method = method
         self.cv = cv
-        self.ensemble=ensemble
+        self.ensemble = ensemble
 
     def fit(self, X, y, sample_weight=None):
         """Fit the calibrated model
@@ -265,7 +265,7 @@ def fit(self, X, y, sample_weight=None):
                 else:
                     n_folds = None
                 if n_folds and np.any([np.sum(y == class_) < n_folds
-                                    for class_ in self.classes_]):
+                                       for class_ in self.classes_]):
                     raise ValueError(f"Requesting {n_folds}-fold "
                                      "cross-validation but provided less than "
                                      f"{n_folds} examples for at least one "

From 443ae501d9f0a04b1ee01d8044f11d2d8c610052 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 8 Jul 2020 17:19:38 +0200
Subject: [PATCH 12/44] rename df to preds

---
 sklearn/calibration.py | 91 ++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 44 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 5e599c81790af..653cad1290325 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -91,6 +91,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
 
     ensemble : bool, default=True
         Determines how the calibrator is fit, if `cv` is not `'prefit'`.
+        Ignored if `cv='prefit'`.
 
         If `True`, the `base_estimator` is fit and calibrated on each
         `cv` fold. The final estimator is an ensemble that outputs the
@@ -106,7 +107,6 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
 
         .. versionadded:: 0.24
 
-
     Attributes
     ----------
     classes_ : array, shape (n_classes)
@@ -121,10 +121,6 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
         The number of features in `X`. If `cv='prefit'`, number of features
         in the data used to fit `base_estimator`.
 
-    label_encoder_ : LabelEncoder instance
-        `LabelEncoder` fitted on `y`. If `cv='prefit'`, `LabelEncoder`
-        fitted on `base_estimator.classes_`.
-
     Examples
     --------
     >>> from sklearn.datasets import make_classification
@@ -227,11 +223,11 @@ def fit(self, X, y, sample_weight=None):
             with suppress(AttributeError):
                 self.n_features_in_ = base_estimator.n_features_in_
             self.classes_ = self.base_estimator.classes_
-            self.label_encoder_ = LabelEncoder().fit(self.classes_)
+            label_encoder_ = LabelEncoder().fit(self.classes_)
 
             calibrated_classifier = _fit_calibrator(
-                base_estimator, self.label_encoder_, self.method, y=y, X=X,
-                sample_weight=sample_weight
+                base_estimator, label_encoder_, self.method, X, y,
+                sample_weight
             )
             self.calibrated_classifiers_.append(calibrated_classifier)
         else:
@@ -242,7 +238,7 @@ def fit(self, X, y, sample_weight=None):
             # Set attributes using all `y`
             le = LabelEncoder().fit(y)
             self.classes_ = le.classes_
-            self.label_encoder_ = le
+            label_encoder_ = le
 
             fit_parameters = signature(base_estimator.fit).parameters
             base_estimator_supports_sw = "sample_weight" in fit_parameters
@@ -297,14 +293,14 @@ def fit(self, X, y, sample_weight=None):
                     raise RuntimeError("'base_estimator' as no "
                                        "'decision_function' or 'predict_proba'"
                                        " method.")
-                df = cross_val_predict(base_estimator, X, y, cv=cv,
+                preds = cross_val_predict(base_estimator, X, y, cv=cv,
                                        method=base_estimator_method)
                 if base_estimator_method == "decision_function":
-                    if df.ndim == 1:
-                        df = df[:, np.newaxis]
+                    if preds.ndim == 1:
+                        preds = preds[:, np.newaxis]
                 else:
                     if len(self.label_encoder_.classes_) == 2:
-                        df = df[:, 1:]
+                        preds = preds[:, 1]
 
                 this_estimator = clone(base_estimator)
                 if sample_weight is not None and base_estimator_supports_sw:
@@ -312,8 +308,8 @@ def fit(self, X, y, sample_weight=None):
                 else:
                     this_estimator.fit(X, y)
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, self.label_encoder_, self.method, y=y,
-                    df=df, sample_weight=sample_weight
+                    this_estimator, label_encoder_, self.method,
+                    X[test], y[test], sw
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
         return self
@@ -395,7 +391,7 @@ def _get_predictions(clf_fitted, X, label_encoder_):
 
     Returns
     -------
-    df : array-like, shape (X.shape[0], len(clf_fitted.classes_))
+    preds : array-like, shape (X.shape[0], len(clf_fitted.classes_))
         The predictions. Note array is of shape (X.shape[0], 1) when there are
         2 classes.
 
@@ -403,29 +399,34 @@ def _get_predictions(clf_fitted, X, label_encoder_):
         Indices of the classes present in `X`.
     """
     if hasattr(clf_fitted, "decision_function"):
-        df = clf_fitted.decision_function(X)
-        if df.ndim == 1:
-            df = df[:, np.newaxis]
+        preds = clf_fitted.decision_function(X)
+        if preds.ndim == 1:
+            preds = preds[:, np.newaxis]
     elif hasattr(clf_fitted, "predict_proba"):
-        df = clf_fitted.predict_proba(X)
+        preds = clf_fitted.predict_proba(X)
         if len(label_encoder_.classes_) == 2:
-            df = df[:, 1:]
+            preds = preds[:, 1]
     else:
         raise RuntimeError("'base_estimator' has no 'decision_function' or "
                            "'predict_proba' method.")
 
     pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
 
-    return df, pos_class_indices
+    return preds, pos_class_indices
 
 
-def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, df=None,
+def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, preds=None,
                     sample_weight=None):
     """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
     instance.
 
-    Output of the `decision_function` method of the `clf_fitted` is used for
-    calibration. If this method does not exist, `predict_proba` method is used.
+    If `X` is not None, it is used to obtain predictions, used for calibration.
+    The `decision_function` method of `clf_fitted` is used if present. If not,
+    `predict_proba` method is used. If `preds` is not None, it is used for
+    calibration. Only one of `X` or `preds` should be not None.
+
+    `n_classes` calibrators are fitted. However, if `n_classes` equals 2,
+    one calibrator is fit.
 
     Parameters
     ----------
@@ -442,15 +443,15 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, df=None,
         The targets.
 
     X : array-like, shape (n_samples, n_features), default=None
-        Sample data used to calibrate predictions. If None, use df instead.
+        Sample data used to calibrate predictions. If None, use `preds` instead.
 
-    df :  array-like, shape (n_samples, n_classes), default=None
-        Predictions, output from `base_estimator`, used to calibrate
-        predictions. If None, use X instead.
+    preds :  array-like, shape (n_samples, n_classes), default=None
+        The predictions, output from `base_estimator`, used to calibrate
+        predictions. If None, use `X` instead.
         If binary (i.e., `label_encoder_.classes_` = 2), shape (n_samples, 1)
 
     sample_weight : ndarray, shape (n_samples,), default=None
-        Sample weights. If `None`, then samples are equally weighted.
+        Sample weights. If None, then samples are equally weighted.
 
     Returns
     -------
@@ -458,12 +459,14 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, df=None,
     """
     Y = label_binarize(y, classes=label_encoder_.classes_)
     if X is not None:
-        df, pos_class_indices = _get_predictions(clf_fitted, X, label_encoder_)
-    elif df is not None:
+        preds, pos_class_indices = _get_predictions(clf_fitted, X, label_encoder_)
+    elif preds is not None:
         pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
+    else:
+        raise ValueError("One of `X` or `preds` should be not None.")
 
     calibrated_classifiers = []
-    for class_idx, this_df in zip(pos_class_indices, df.T):
+    for class_idx, this_pred in zip(pos_class_indices, preds.T):
         if method == 'isotonic':
             calibrator = IsotonicRegression(out_of_bounds='clip')
         elif method == 'sigmoid':
@@ -471,7 +474,7 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, df=None,
         else:
             raise ValueError("'method' should be one of: 'sigmoid' or "
                              f"'isotonic'. Got {method}.")
-        calibrator.fit(this_df, Y[:, class_idx], sample_weight)
+        calibrator.fit(this_pred, Y[:, class_idx], sample_weight)
         calibrated_classifiers.append(calibrator)
 
     pipeline = _CalibratedClassiferPipeline(
@@ -516,18 +519,18 @@ def predict_proba(self, X):
             The predicted probabilities. Can be exact zeros.
         """
         n_classes = len(self.label_encoder_.classes_)
-        df, pos_class_indices = _get_predictions(
+        preds, pos_class_indices = _get_predictions(
             self.clf_fitted, X, self.label_encoder_
         )
 
         proba = np.zeros((X.shape[0], n_classes))
-        for class_idx, this_df, calibrator in \
-                zip(pos_class_indices, df.T, self.calibrators_fitted):
+        for class_idx, this_pred, calibrator in \
+                zip(pos_class_indices, preds.T, self.calibrators_fitted):
             if n_classes == 2:
-                # When binary, proba of clf_fitted.classes_[1]
-                # output but `pos_class_indices` = 0
+                # When binary, `preds` consists only of predictions for
+                # clf_fitted.classes_[1] but `pos_class_indices` = 0
                 class_idx += 1
-            proba[:, class_idx] = calibrator.predict(this_df)
+            proba[:, class_idx] = calibrator.predict(this_pred)
 
         # Normalize the probabilities
         if n_classes == 2:
@@ -544,12 +547,12 @@ def predict_proba(self, X):
         return proba
 
 
-def _sigmoid_calibration(df, y, sample_weight=None):
+def _sigmoid_calibration(pred, y, sample_weight=None):
     """Probability Calibration with sigmoid method (Platt 2000)
 
     Parameters
     ----------
-    df : ndarray, shape (n_samples,)
+    pred : ndarray, shape (n_samples,)
         The decision function or predict proba for the samples.
 
     y : ndarray, shape (n_samples,)
@@ -570,10 +573,10 @@ def _sigmoid_calibration(df, y, sample_weight=None):
     ----------
     Platt, "Probabilistic Outputs for Support Vector Machines"
     """
-    df = column_or_1d(df)
+    pred = column_or_1d(pred)
     y = column_or_1d(y)
 
-    F = df  # F follows Platt's notations
+    F = pred  # F follows Platt's notations
 
     # Bayesian priors (see Platt end of section 2.2)
     prior0 = float(np.sum(y <= 0))

From 093105d21f2322e4a564a93a53f0506e46ca5445 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 8 Jul 2020 17:23:49 +0200
Subject: [PATCH 13/44] lint

---
 sklearn/calibration.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 653cad1290325..779617a96916c 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -443,7 +443,8 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, preds=None,
         The targets.
 
     X : array-like, shape (n_samples, n_features), default=None
-        Sample data used to calibrate predictions. If None, use `preds` instead.
+        Sample data used to calibrate predictions. If None, use `preds`
+        instead.
 
     preds :  array-like, shape (n_samples, n_classes), default=None
         The predictions, output from `base_estimator`, used to calibrate
@@ -459,7 +460,8 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, preds=None,
     """
     Y = label_binarize(y, classes=label_encoder_.classes_)
     if X is not None:
-        preds, pos_class_indices = _get_predictions(clf_fitted, X, label_encoder_)
+        preds, pos_class_indices = _get_predictions(clf_fitted, X,
+                                                    label_encoder_)
     elif preds is not None:
         pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
     else:

From 7dcd4e512b666e0963eeeb83f5d83d8deb28085d Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 9 Jul 2020 13:42:19 +0200
Subject: [PATCH 14/44] lint

---
 sklearn/calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 779617a96916c..88fa1cb882074 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -294,7 +294,7 @@ def fit(self, X, y, sample_weight=None):
                                        "'decision_function' or 'predict_proba'"
                                        " method.")
                 preds = cross_val_predict(base_estimator, X, y, cv=cv,
-                                       method=base_estimator_method)
+                                          method=base_estimator_method)
                 if base_estimator_method == "decision_function":
                     if preds.ndim == 1:
                         preds = preds[:, np.newaxis]

From 9e1ea370800e9ee776e02309947815cb65fff616 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 9 Jul 2020 19:44:17 +0200
Subject: [PATCH 15/44] sep pred and fit calib

---
 sklearn/calibration.py | 192 ++++++++++++++++++++++++-----------------
 1 file changed, 113 insertions(+), 79 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 88fa1cb882074..0e063e87f49ae 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -225,8 +225,12 @@ def fit(self, X, y, sample_weight=None):
             self.classes_ = self.base_estimator.classes_
             label_encoder_ = LabelEncoder().fit(self.classes_)
 
+            preds = _get_predictions(
+                base_estimator, X, label_encoder_
+            )
+
             calibrated_classifier = _fit_calibrator(
-                base_estimator, label_encoder_, self.method, X, y,
+                base_estimator, preds, y, label_encoder_, self.method,
                 sample_weight
             )
             self.calibrated_classifiers_.append(calibrated_classifier)
@@ -236,9 +240,8 @@ def fit(self, X, y, sample_weight=None):
                 force_all_finite=False, allow_nd=True
             )
             # Set attributes using all `y`
-            le = LabelEncoder().fit(y)
-            self.classes_ = le.classes_
-            label_encoder_ = le
+            label_encoder_ = LabelEncoder().fit(y)
+            self.classes_ = label_encoder_.classes_
 
             fit_parameters = signature(base_estimator.fit).parameters
             base_estimator_supports_sw = "sample_weight" in fit_parameters
@@ -278,38 +281,34 @@ def fit(self, X, y, sample_weight=None):
                     else:
                         this_estimator.fit(X[train], y[train])
 
+                    preds = _get_predictions(
+                        this_estimator, X[test], label_encoder_
+                    )
+
                     sw = None if sample_weight is None else sample_weight[test]
                     calibrated_classifier = _fit_calibrator(
-                        this_estimator, self.label_encoder_, self.method,
-                        y=y[test], X=X[test], sample_weight=sw
+                        this_estimator, preds, y[test], label_encoder_,
+                        self.method, sample_weight=sw
                     )
                     self.calibrated_classifiers_.append(calibrated_classifier)
             else:
-                if hasattr(base_estimator, "decision_function"):
-                    base_estimator_method = "decision_function"
-                elif hasattr(base_estimator, "predict_proba"):
-                    base_estimator_method = "predict_proba"
-                else:
-                    raise RuntimeError("'base_estimator' as no "
-                                       "'decision_function' or 'predict_proba'"
-                                       " method.")
+                pred_method = get_prediction_method(
+                    base_estimator, return_string=True
+                )
                 preds = cross_val_predict(base_estimator, X, y, cv=cv,
-                                          method=base_estimator_method)
-                if base_estimator_method == "decision_function":
-                    if preds.ndim == 1:
-                        preds = preds[:, np.newaxis]
-                else:
-                    if len(self.label_encoder_.classes_) == 2:
-                        preds = preds[:, 1]
+                                          method=pred_method)
+                preds = _reshape_preds(
+                    preds, pred_method, len(label_encoder_.classes_)
+                )
 
                 this_estimator = clone(base_estimator)
                 if sample_weight is not None and base_estimator_supports_sw:
-                    this_estimator.fit(X, y, sample_weight=sample_weight)
+                    this_estimator.fit(X, y, sample_weight)
                 else:
                     this_estimator.fit(X, y)
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, label_encoder_, self.method,
-                    X[test], y[test], sw
+                    this_estimator, preds, y, label_encoder_,self.method,
+                    sample_weight
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
         return self
@@ -371,85 +370,126 @@ def _more_tags(self):
         }
 
 
-def _get_predictions(clf_fitted, X, label_encoder_):
-    """Returns predictions for `X` and the index of classes present in
-    `clf_fitted`.
+def get_prediction_method(clf_fitted, return_string=False):
+    """Return prediction method or their corresponding name as string.
 
-    For predictions, `decision_function` method of the `clf_fitted` is used.
-    If this does not exist, `predict_proba` method used.
+    `decision_function` method of `clf_fitted` returned, if it
+    exists, otherwise `predict_proba` method returned.
 
     Parameters
     ----------
     clf_fitted : Estimator instance
-        Fitted classifier instance.
-
-    X : array-like
-        Sample data used for the predictions.
+        Classifier to obtain the prediction method from.
 
-    label_encoder_ : LabelEncoder instance
-        LabelEncoder instance fitted on all the targets.
+    return_string : bool, default=False
+        Whether to return the method name as string instead of the prediction
+        method.
 
     Returns
     -------
-    preds : array-like, shape (X.shape[0], len(clf_fitted.classes_))
-        The predictions. Note array is of shape (X.shape[0], 1) when there are
-        2 classes.
-
-    pos_class_indices : array-like, shape (n_classes,)
-        Indices of the classes present in `X`.
+    prediction_method : callable or str
+        If `return_string=True`, name of the prediction method as string.
+        If `return_string=False`, the prediction method.
     """
     if hasattr(clf_fitted, "decision_function"):
-        preds = clf_fitted.decision_function(X)
-        if preds.ndim == 1:
-            preds = preds[:, np.newaxis]
+        method_name = "decision_function"
+        method = getattr(clf_fitted, "decision_function")
     elif hasattr(clf_fitted, "predict_proba"):
-        preds = clf_fitted.predict_proba(X)
-        if len(label_encoder_.classes_) == 2:
-            preds = preds[:, 1]
+        method_name = "predict_proba"
+        method = getattr(clf_fitted, "predict_proba")
     else:
         raise RuntimeError("'base_estimator' has no 'decision_function' or "
                            "'predict_proba' method.")
+    if return_string:
+        return method_name
+    else:
+        return method
 
-    pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
 
-    return preds, pos_class_indices
+def _reshape_preds(preds, method, n_classes):
+    """Reshape predictions when classification binary.
 
+    Parameters
+    ----------
+    preds : array-like
+        Predictions.
 
-def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, preds=None,
-                    sample_weight=None):
-    """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
-    instance.
+    method : {'decision_function', 'predict_proba'}
+        Method used to obtain the predictions.
+
+    n_classes : int
+        Number of classes.
+
+    Returns
+    -------
+    preds : array-like, shape (n_samples, 1)
+        Reshaped predictions
+    """
+    if method == 'decision_function':
+        if preds.ndim == 1:
+            preds = preds[:, np.newaxis]
+    elif method == 'predict_proba':
+        if n_classes == 2:
+            preds = preds[:, 1:]
+    else:
+        raise RuntimeError("'method' needs to be one of 'decision_function' "
+                           "or 'predict_proba'.")
+    return preds
 
-    If `X` is not None, it is used to obtain predictions, used for calibration.
-    The `decision_function` method of `clf_fitted` is used if present. If not,
-    `predict_proba` method is used. If `preds` is not None, it is used for
-    calibration. Only one of `X` or `preds` should be not None.
 
-    `n_classes` calibrators are fitted. However, if `n_classes` equals 2,
-    one calibrator is fit.
+def _get_predictions(clf_fitted, X, label_encoder_):
+    """Returns predictions for `X` and the index of classes present.
 
     Parameters
     ----------
     clf_fitted : Estimator instance
         Fitted classifier.
 
+    X : array-like
+        Data used to obtain predictions.
+
     label_encoder_ : LabelEncoder instance
         LabelEncoder instance fitted on all the targets.
 
-    method : {'sigmoid', 'isotonic'}
-        The method to use for calibration.
+    Returns
+    -------
+    preds : array-like, shape (X.shape[0], len(clf_fitted.classes_))
+        The predictions. Note if there are 2 classes, array is of shape
+        (X.shape[0], 1).
+    """
+    pred_method = get_prediction_method(clf_fitted)
+    preds = pred_method(X)
+    n_classes = len(clf_fitted.classes_)
+    preds = _reshape_preds(preds, pred_method.__name__, n_classes)
+
+    return preds
+
+
+def _fit_calibrator(clf_fitted, preds, y, label_encoder_, method,
+                    sample_weight=None):
+    """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
+    instance.
+
+    `n_classes` (i.e. `len(clf_fitted.classes_)`) calibrators are fitted.
+    However, if `n_classes` equals 2, one calibrator is fit.
+
+    Parameters
+    ----------
+    clf_fitted : Estimator instance
+        Fitted classifier.
+
+    preds :  array-like, shape (n_samples, n_classes)
+        Predictions for calibrating the predictions.
+        If binary, shape (n_samples, 1).
 
     y : ndarray, shape (n_samples,)
         The targets.
 
-    X : array-like, shape (n_samples, n_features), default=None
-        Sample data used to calibrate predictions. If None, use `preds`
-        instead.
+    label_encoder_ : LabelEncoder instance
+        LabelEncoder instance fitted on all the targets.
 
-    preds :  array-like, shape (n_samples, n_classes), default=None
-        The predictions, output from `base_estimator`, used to calibrate
-        predictions. If None, use `X` instead.
-        If binary (i.e., `label_encoder_.classes_` = 2), shape (n_samples, 1)
+    method : {'sigmoid', 'isotonic'}
+        The method to use for calibration.
 
     sample_weight : ndarray, shape (n_samples,), default=None
         Sample weights. If None, then samples are equally weighted.
@@ -459,14 +499,7 @@ def _fit_calibrator(clf_fitted, label_encoder_, method, y, X=None, preds=None,
     pipeline : _CalibratedClassiferPipeline instance
     """
     Y = label_binarize(y, classes=label_encoder_.classes_)
-    if X is not None:
-        preds, pos_class_indices = _get_predictions(clf_fitted, X,
-                                                    label_encoder_)
-    elif preds is not None:
-        pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
-    else:
-        raise ValueError("One of `X` or `preds` should be not None.")
-
+    pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
     calibrated_classifiers = []
     for class_idx, this_pred in zip(pos_class_indices, preds.T):
         if method == 'isotonic':
@@ -520,11 +553,12 @@ def predict_proba(self, X):
         proba : array, shape (n_samples, n_classes)
             The predicted probabilities. Can be exact zeros.
         """
-        n_classes = len(self.label_encoder_.classes_)
-        preds, pos_class_indices = _get_predictions(
-            self.clf_fitted, X, self.label_encoder_
+        preds = _get_predictions(self.clf_fitted, X, self.label_encoder_)
+        pos_class_indices = self.label_encoder_.transform(
+            self.clf_fitted.classes_
         )
 
+        n_classes = len(self.label_encoder_.classes_)
         proba = np.zeros((X.shape[0], n_classes))
         for class_idx, this_pred, calibrator in \
                 zip(pos_class_indices, preds.T, self.calibrators_fitted):

From 42d405bd91fec1ab90eb7c4d76178fbd5fbf4d0a Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 9 Jul 2020 19:57:24 +0200
Subject: [PATCH 16/44] lint

---
 sklearn/calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 0e063e87f49ae..89fe3749a047d 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -307,7 +307,7 @@ def fit(self, X, y, sample_weight=None):
                 else:
                     this_estimator.fit(X, y)
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, preds, y, label_encoder_,self.method,
+                    this_estimator, preds, y, label_encoder_, self.method,
                     sample_weight
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)

From b628835f42043b65d4f02c8818017e02bb9e6ce6 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 9 Jul 2020 22:37:47 +0200
Subject: [PATCH 17/44] use partial

---
 sklearn/calibration.py | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 89fe3749a047d..0e3715139b506 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -10,6 +10,7 @@
 import warnings
 from inspect import signature
 from contextlib import suppress
+from functools import partial
 
 from math import log
 import numpy as np
@@ -225,9 +226,8 @@ def fit(self, X, y, sample_weight=None):
             self.classes_ = self.base_estimator.classes_
             label_encoder_ = LabelEncoder().fit(self.classes_)
 
-            preds = _get_predictions(
-                base_estimator, X, label_encoder_
-            )
+            pred_method = get_prediction_method(base_estimator)
+            preds = _get_predictions(pred_method, X, label_encoder_)
 
             calibrated_classifier = _fit_calibrator(
                 base_estimator, preds, y, label_encoder_, self.method,
@@ -281,8 +281,9 @@ def fit(self, X, y, sample_weight=None):
                     else:
                         this_estimator.fit(X[train], y[train])
 
+                    pred_method = get_prediction_method(this_estimator)
                     preds = _get_predictions(
-                        this_estimator, X[test], label_encoder_
+                        pred_method, X[test], label_encoder_
                     )
 
                     sw = None if sample_weight is None else sample_weight[test]
@@ -292,13 +293,12 @@ def fit(self, X, y, sample_weight=None):
                     )
                     self.calibrated_classifiers_.append(calibrated_classifier)
             else:
-                pred_method = get_prediction_method(
-                    base_estimator, return_string=True
-                )
-                preds = cross_val_predict(base_estimator, X, y, cv=cv,
-                                          method=pred_method)
-                preds = _reshape_preds(
-                    preds, pred_method, len(label_encoder_.classes_)
+                pred_method = partial(cross_val_predict(
+                    base_estimator, X, y, cv=cv,
+                    method=get_prediction_method(base_estimator,
+                                                 return_string=True)))
+                preds = _get_predictions(
+                    pred_method, X, label_encoder_
                 )
 
                 this_estimator = clone(base_estimator)
@@ -437,13 +437,13 @@ def _reshape_preds(preds, method, n_classes):
     return preds
 
 
-def _get_predictions(clf_fitted, X, label_encoder_):
+def _get_predictions(pred_method, X, label_encoder_):
     """Returns predictions for `X` and the index of classes present.
 
     Parameters
     ----------
-    clf_fitted : Estimator instance
-        Fitted classifier.
+    pred_method : callable
+        Prediction method.
 
     X : array-like
         Data used to obtain predictions.
@@ -457,11 +457,9 @@ def _get_predictions(clf_fitted, X, label_encoder_):
         The predictions. Note if there are 2 classes, array is of shape
         (X.shape[0], 1).
     """
-    pred_method = get_prediction_method(clf_fitted)
     preds = pred_method(X)
-    n_classes = len(clf_fitted.classes_)
+    n_classes = len(label_encoder_.classes_)
     preds = _reshape_preds(preds, pred_method.__name__, n_classes)
-
     return preds
 
 
@@ -553,7 +551,8 @@ def predict_proba(self, X):
         proba : array, shape (n_samples, n_classes)
             The predicted probabilities. Can be exact zeros.
         """
-        preds = _get_predictions(self.clf_fitted, X, self.label_encoder_)
+        pred_method = get_prediction_method(self.clf_fitted)
+        preds = _get_predictions(pred_method, X, self.label_encoder_)
         pos_class_indices = self.label_encoder_.transform(
             self.clf_fitted.classes_
         )

From 6d30cf8f69cb9e9f58503c9c96924696bfdecd88 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Tue, 28 Jul 2020 17:16:06 +0200
Subject: [PATCH 18/44] suggestions, update docstring

---
 sklearn/calibration.py            | 240 ++++++++++++++----------------
 sklearn/tests/test_calibration.py |   2 +-
 2 files changed, 110 insertions(+), 132 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 0e3715139b506..f66e5d4bcba21 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -38,15 +38,21 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
     """Probability calibration with isotonic regression or logistic regression.
 
     This class uses cross-validation to both estimate the parameters of a
-    classifier and subsequently calibrate a classifier. For each cv split it
-    fits a copy of the base estimator to the training folds, and calibrates it
-    using the testing fold. For prediction, predicted probabilities are
-    averaged across these individual calibrated classifiers.
-
-    Already fitted classifiers can be calibrated via the parameter cv="prefit".
-    In this case, no cross-validation is used and all provided data is used
-    for calibration. The user has to take care manually that data for model
-    fitting and calibration are disjoint.
+    classifier and subsequently calibrate a classifier. With default
+    `ensemble=True`, for each cv split it
+    fits a copy of the base estimator to the training subset, and calibrates it
+    using the testing subset. For prediction, predicted probabilities are
+    averaged across these individual calibrated classifiers. When
+    `ensemble=False`, cross-validation is used to obtain unbiased predictions,
+    from the testing subset. These are concatenated together and used for
+    calibration. For prediction, the base estimator, trained using all the
+    data, is used. This is the method implemented when `probabilities=True` for
+    :mod:`sklearn.svm` estimators.
+
+    Already fitted classifiers can be calibrated via the parameter
+    `cv="prefit"`. In this case, no cross-validation is used and all provided
+    data is used for calibration. The user has to take care manually that data
+    for model fitting and calibration are disjoint.
 
     The calibration is based on the :term:`decision_function` method of the
     `base_estimator` if it exists, else on :term:`predict_proba`.
@@ -67,7 +73,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
         ``(<<1000)`` since it tends to overfit.
 
     cv : integer, cross-validation generator, iterable or "prefit", \
-            default=None
+         default=None
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
 
@@ -91,18 +97,19 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
             ``cv`` default value if None changed from 3-fold to 5-fold.
 
     ensemble : bool, default=True
-        Determines how the calibrator is fit, if `cv` is not `'prefit'`.
+        Determines how the calibrator is fitted when `cv` is not `'prefit'`.
         Ignored if `cv='prefit'`.
 
-        If `True`, the `base_estimator` is fit and calibrated on each
-        `cv` fold. The final estimator is an ensemble that outputs the
-        average predicted probabilities of all fitted classifier and calibrator
-        pairs.
+        If `True`, the `base_estimator` is fitted using training data and
+        calibrated using testing data, for each `cv` fold. The final estimator
+        is an ensemble of `n_cv` fitted classifer and calibrator pairs, where
+        `n_cv` is the number of cross-validation folds. The output is the
+        average predicted probabilities of all pairs.
 
-        If `False`, `cv` is used to compute unbiased predictions, which
-        are concatenated and used to train the calibrator (sigmoid or isotonic
-        model). The `base_estimator` trained on all the data is used at
-        prediction time.
+        If `False`, `cv` is used to compute unbiased predictions (with the
+        test fold), which are concatenated and used to train the calibrator
+        (sigmoid or isotonic model). At prediction time, the classifier used
+        is the `base_estimator` trained on all the data.
         Note this method is implemented when `probabilities=True` for
         :mod:`sklearn.svm` estimators.
 
@@ -113,10 +120,17 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin,
     classes_ : array, shape (n_classes)
         The class labels.
 
-    calibrated_classifiers_ : list (len() equal to cv or 1 if cv == "prefit")
-        The list of calibrated classifiers, one for each cross-validation
-        split, which has been fitted on training folds and
-        calibrated on the testing fold.
+    calibrated_classifiers_ : list (len() equal to cv or 1 if `cv="prefit"` \
+                              or `ensemble=False`)
+        The list of classifier and calibrator pairs.
+
+        - When `cv="prefit"`, the fitted `base_estimator` and fitted
+          calibrator.
+        - When `cv` is not "prefit" and `ensemble=True`, `n_cv` fitted
+          `base_estimator` and calibrator pairs. `n_cv` is the number of
+          cross-validation folds.
+        - When `cv` is not "prefit" and `ensemble=False`, the `base_estimator`,
+          fitted on all the data, and fitted calibrator.
 
     n_features_in_ : int
         The number of features in `X`. If `cv='prefit'`, number of features
@@ -185,7 +199,7 @@ def __init__(self, base_estimator=None, *, method='sigmoid', cv=None,
         self.ensemble = ensemble
 
     def fit(self, X, y, sample_weight=None):
-        """Fit the calibrated model
+        """Fit the calibrated model.
 
         Parameters
         ----------
@@ -224,13 +238,13 @@ def fit(self, X, y, sample_weight=None):
             with suppress(AttributeError):
                 self.n_features_in_ = base_estimator.n_features_in_
             self.classes_ = self.base_estimator.classes_
-            label_encoder_ = LabelEncoder().fit(self.classes_)
 
-            pred_method = get_prediction_method(base_estimator)
-            preds = _get_predictions(pred_method, X, label_encoder_)
+            pred_method = _get_prediction_method(base_estimator)
+            n_classes = len(self.classes_)
+            preds = _get_predictions(pred_method, X, n_classes)
 
             calibrated_classifier = _fit_calibrator(
-                base_estimator, preds, y, label_encoder_, self.method,
+                base_estimator, preds, y, self.classes_, self.method,
                 sample_weight
             )
             self.calibrated_classifiers_.append(calibrated_classifier)
@@ -242,6 +256,7 @@ def fit(self, X, y, sample_weight=None):
             # Set attributes using all `y`
             label_encoder_ = LabelEncoder().fit(y)
             self.classes_ = label_encoder_.classes_
+            n_classes = len(self.classes_)
 
             fit_parameters = signature(base_estimator.fit).parameters
             base_estimator_supports_sw = "sample_weight" in fit_parameters
@@ -281,40 +296,35 @@ def fit(self, X, y, sample_weight=None):
                     else:
                         this_estimator.fit(X[train], y[train])
 
-                    pred_method = get_prediction_method(this_estimator)
-                    preds = _get_predictions(
-                        pred_method, X[test], label_encoder_
-                    )
+                    pred_method = _get_prediction_method(this_estimator)
+                    preds = _get_predictions(pred_method, X[test], n_classes)
 
                     sw = None if sample_weight is None else sample_weight[test]
                     calibrated_classifier = _fit_calibrator(
-                        this_estimator, preds, y[test], label_encoder_,
+                        this_estimator, preds, y[test], self.classes_,
                         self.method, sample_weight=sw
                     )
                     self.calibrated_classifiers_.append(calibrated_classifier)
             else:
+                this_estimator = clone(base_estimator)
+                method = _get_prediction_method(this_estimator)
                 pred_method = partial(cross_val_predict(
-                    base_estimator, X, y, cv=cv,
-                    method=get_prediction_method(base_estimator,
-                                                 return_string=True)))
-                preds = _get_predictions(
-                    pred_method, X, label_encoder_
-                )
+                    this_estimator, X, y, cv=cv, method=method.__name__))
+                preds = _get_predictions(pred_method, X, n_classes)
 
-                this_estimator = clone(base_estimator)
                 if sample_weight is not None and base_estimator_supports_sw:
                     this_estimator.fit(X, y, sample_weight)
                 else:
                     this_estimator.fit(X, y)
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, preds, y, label_encoder_, self.method,
+                    this_estimator, preds, y, self.classes_, self.method,
                     sample_weight
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
         return self
 
     def predict_proba(self, X):
-        """Calibrated probabilities of classification
+        """Calibrated probabilities of classification.
 
         This function returns calibrated probabilities of classification
         according to each class on an array of test vectors X.
@@ -370,61 +380,55 @@ def _more_tags(self):
         }
 
 
-def get_prediction_method(clf_fitted, return_string=False):
-    """Return prediction method or their corresponding name as string.
+def _get_prediction_method(clf):
+    """Return prediction method.
 
-    `decision_function` method of `clf_fitted` returned, if it
+    `decision_function` method of `clf` returned, if it
     exists, otherwise `predict_proba` method returned.
 
     Parameters
     ----------
-    clf_fitted : Estimator instance
-        Classifier to obtain the prediction method from.
-
-    return_string : bool, default=False
-        Whether to return the method name as string instead of the prediction
-        method.
+    clf : Estimator instance
+        Fitted classifier to obtain the prediction method from.
 
     Returns
     -------
-    prediction_method : callable or str
-        If `return_string=True`, name of the prediction method as string.
-        If `return_string=False`, the prediction method.
+    prediction_method : callable
+        The prediction method.
     """
-    if hasattr(clf_fitted, "decision_function"):
-        method_name = "decision_function"
-        method = getattr(clf_fitted, "decision_function")
-    elif hasattr(clf_fitted, "predict_proba"):
-        method_name = "predict_proba"
-        method = getattr(clf_fitted, "predict_proba")
+    if hasattr(clf, "decision_function"):
+        method = getattr(clf, "decision_function")
+    elif hasattr(clf, "predict_proba"):
+        method = getattr(clf, "predict_proba")
     else:
         raise RuntimeError("'base_estimator' has no 'decision_function' or "
                            "'predict_proba' method.")
-    if return_string:
-        return method_name
-    else:
-        return method
+    return method
 
 
-def _reshape_preds(preds, method, n_classes):
-    """Reshape predictions when classification binary.
+def _get_predictions(pred_method, X, n_classes):
+    """Returns predictions for `X`.
 
     Parameters
     ----------
-    preds : array-like
-        Predictions.
+    pred_method : callable
+        Prediction method.
 
-    method : {'decision_function', 'predict_proba'}
-        Method used to obtain the predictions.
+    X : array-like
+        Data used to obtain predictions.
 
     n_classes : int
-        Number of classes.
+        Number of classes present.
 
     Returns
     -------
-    preds : array-like, shape (n_samples, 1)
-        Reshaped predictions
+    preds : array-like, shape (X.shape[0], len(clf.classes_))
+        The predictions. Note if there are 2 classes, array is of shape
+        (X.shape[0], 1).
     """
+    preds = pred_method(X)
+    method = pred_method.__name__
+
     if method == 'decision_function':
         if preds.ndim == 1:
             preds = preds[:, np.newaxis]
@@ -437,54 +441,27 @@ def _reshape_preds(preds, method, n_classes):
     return preds
 
 
-def _get_predictions(pred_method, X, label_encoder_):
-    """Returns predictions for `X` and the index of classes present.
-
-    Parameters
-    ----------
-    pred_method : callable
-        Prediction method.
-
-    X : array-like
-        Data used to obtain predictions.
-
-    label_encoder_ : LabelEncoder instance
-        LabelEncoder instance fitted on all the targets.
-
-    Returns
-    -------
-    preds : array-like, shape (X.shape[0], len(clf_fitted.classes_))
-        The predictions. Note if there are 2 classes, array is of shape
-        (X.shape[0], 1).
-    """
-    preds = pred_method(X)
-    n_classes = len(label_encoder_.classes_)
-    preds = _reshape_preds(preds, pred_method.__name__, n_classes)
-    return preds
-
-
-def _fit_calibrator(clf_fitted, preds, y, label_encoder_, method,
-                    sample_weight=None):
+def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
     """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
     instance.
 
-    `n_classes` (i.e. `len(clf_fitted.classes_)`) calibrators are fitted.
-    However, if `n_classes` equals 2, one calibrator is fit.
+    `n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.
+    However, if `n_classes` equals 2, one calibrator is fitted.
 
     Parameters
     ----------
-    clf_fitted : Estimator instance
+    clf : Estimator instance
         Fitted classifier.
 
-    preds :  array-like, shape (n_samples, n_classes)
+    preds :  array-like, shape (n_samples, n_classes) or (n_samples, 1) when \
+             binary.
         Predictions for calibrating the predictions.
-        If binary, shape (n_samples, 1).
 
-    y : ndarray, shape (n_samples,)
+    y : array-like, shape (n_samples,)
         The targets.
 
-    label_encoder_ : LabelEncoder instance
-        LabelEncoder instance fitted on all the targets.
+    classes : ndarray, shape (n_classes,)
+        All the prediction classes.
 
     method : {'sigmoid', 'isotonic'}
         The method to use for calibration.
@@ -496,9 +473,10 @@ def _fit_calibrator(clf_fitted, preds, y, label_encoder_, method,
     -------
     pipeline : _CalibratedClassiferPipeline instance
     """
-    Y = label_binarize(y, classes=label_encoder_.classes_)
-    pos_class_indices = label_encoder_.transform(clf_fitted.classes_)
-    calibrated_classifiers = []
+    Y = label_binarize(y, classes=classes)
+    label_encoder = LabelEncoder().fit(classes)
+    pos_class_indices = label_encoder.transform(clf.classes_)
+    calibrators = []
     for class_idx, this_pred in zip(pos_class_indices, preds.T):
         if method == 'isotonic':
             calibrator = IsotonicRegression(out_of_bounds='clip')
@@ -508,11 +486,9 @@ def _fit_calibrator(clf_fitted, preds, y, label_encoder_, method,
             raise ValueError("'method' should be one of: 'sigmoid' or "
                              f"'isotonic'. Got {method}.")
         calibrator.fit(this_pred, Y[:, class_idx], sample_weight)
-        calibrated_classifiers.append(calibrator)
+        calibrators.append(calibrator)
 
-    pipeline = _CalibratedClassiferPipeline(
-        clf_fitted, calibrated_classifiers, label_encoder_
-    )
+    pipeline = _CalibratedClassiferPipeline(clf, calibrators, classes)
     return pipeline
 
 
@@ -521,19 +497,22 @@ class _CalibratedClassiferPipeline:
 
     Parameters
     ----------
-    clf_fitted : Estimator instance
+    clf : Estimator instance
         Fitted classifier.
 
-    calibrators_fitted : List of fitted estimator instances
+    calibrators : List of fitted estimator instances
         List of fitted calibrators (either 'IsotonicRegression' or
         '_SigmoidCalibration'). The number of calibrators equals the number of
         classes. However, if there are 2 classes, the list contains only one
         fitted calibrator.
+
+    classes : ndarray, shape (n_classes,)
+        All the prediction classes.
     """
-    def __init__(self, clf_fitted, calibrators_fitted, label_encoder_):
-        self.clf_fitted = clf_fitted
-        self.calibrators_fitted = calibrators_fitted
-        self.label_encoder_ = label_encoder_
+    def __init__(self, clf, calibrators, classes):
+        self.clf = clf
+        self.calibrators = calibrators
+        self.classes = classes
 
     def predict_proba(self, X):
         """Calculate calibrated probabilities.
@@ -543,7 +522,7 @@ def predict_proba(self, X):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
+        X : ndarray, shape (n_samples, n_features)
             The sample data.
 
         Returns
@@ -551,19 +530,18 @@ def predict_proba(self, X):
         proba : array, shape (n_samples, n_classes)
             The predicted probabilities. Can be exact zeros.
         """
-        pred_method = get_prediction_method(self.clf_fitted)
-        preds = _get_predictions(pred_method, X, self.label_encoder_)
-        pos_class_indices = self.label_encoder_.transform(
-            self.clf_fitted.classes_
-        )
+        pred_method = _get_prediction_method(self.clf)
+        n_classes = len(self.classes)
+        preds = _get_predictions(pred_method, X, n_classes)
+        label_encoder = LabelEncoder().fit(self.classes)
+        pos_class_indices = label_encoder.transform(self.clf.classes_)
 
-        n_classes = len(self.label_encoder_.classes_)
         proba = np.zeros((X.shape[0], n_classes))
         for class_idx, this_pred, calibrator in \
-                zip(pos_class_indices, preds.T, self.calibrators_fitted):
+                zip(pos_class_indices, preds.T, self.calibrators):
             if n_classes == 2:
                 # When binary, `preds` consists only of predictions for
-                # clf_fitted.classes_[1] but `pos_class_indices` = 0
+                # clf.classes_[1] but `pos_class_indices` = 0
                 class_idx += 1
             proba[:, class_idx] = calibrator.predict(this_pred)
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 6181f0dd50027..a08c5a12821e2 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -109,7 +109,7 @@ def test_calibration_default_estimator():
     calib_clf = CalibratedClassifierCV(cv=2)
     calib_clf.fit(X, y)
 
-    base_est = calib_clf.calibrated_classifiers_[0].clf_fitted
+    base_est = calib_clf.calibrated_classifiers_[0].clf
     assert isinstance(base_est, LinearSVC)
 
 

From fec2395c0164c5bdd7d99583516969fb74d44212 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Tue, 28 Jul 2020 18:59:06 +0200
Subject: [PATCH 19/44] lint

---
 sklearn/calibration.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 86a568c76f474..868bbfa2ab14e 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -327,7 +327,6 @@ def fit(self, X, y, sample_weight=None):
                                      "class.")
                 cv = check_cv(self.cv, y, classifier=True)
 
-
                 parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
                                     pre_dispatch=self.pre_dispatch)
 
@@ -429,10 +428,10 @@ def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
     y : array-like, shape (n_samples,)
         Targets.
 
-    train : ndarray, shape (n_indicies,)
+    train : ndarray, shape (n_train_indicies,)
         Indices of the training subset.
 
-    test : ndarray, shape (n_indicies,)
+    test : ndarray, shape (n_test_indicies,)
         Indices of the testing subset.
 
     supports_sw : bool

From 6701bc7454a028ef9a56d53333ba6a0df957c851 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 29 Jul 2020 16:34:37 +0200
Subject: [PATCH 20/44] add tests

---
 sklearn/calibration.py            |  79 +++++++-------
 sklearn/tests/test_calibration.py | 164 ++++++++++++++++++------------
 2 files changed, 145 insertions(+), 98 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 868bbfa2ab14e..1d61de8b056bf 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -282,7 +282,9 @@ def fit(self, X, y, sample_weight=None):
 
             pred_method = _get_prediction_method(base_estimator)
             n_classes = len(self.classes_)
-            preds = _get_predictions(pred_method, X, n_classes)
+            preds = _get_predictions(
+                pred_method, pred_method.__name__, X, n_classes
+            )
 
             calibrated_classifier = _fit_calibrator(
                 base_estimator, preds, y, self.classes_, self.method,
@@ -294,39 +296,38 @@ def fit(self, X, y, sample_weight=None):
                 X, y, accept_sparse=['csc', 'csr', 'coo'],
                 force_all_finite=False, allow_nd=True
             )
-            # Set attributes using all `y`
+            # Set `classes_` using all `y`
             label_encoder_ = LabelEncoder().fit(y)
             self.classes_ = label_encoder_.classes_
             n_classes = len(self.classes_)
 
+            # sample_weight checks
             fit_parameters = signature(base_estimator.fit).parameters
             supports_sw = "sample_weight" in fit_parameters
-
             if sample_weight is not None:
                 sample_weight = _check_sample_weight(sample_weight, X)
-
                 if not supports_sw:
                     estimator_name = type(base_estimator).__name__
                     warnings.warn("Since %s does not support sample_weights, "
                                   "sample weights will only be used for the "
                                   "calibration itself." % estimator_name)
-            if self.ensemble:
-                # Check that each cross-validation fold can have at least one
-                # example per class
-                if isinstance(self.cv, int):
-                    n_folds = self.cv
-                elif hasattr(self.cv, "n_splits"):
-                    n_folds = self.cv.n_splits
-                else:
-                    n_folds = None
-                if n_folds and np.any([np.sum(y == class_) < n_folds
-                                       for class_ in self.classes_]):
-                    raise ValueError(f"Requesting {n_folds}-fold "
-                                     "cross-validation but provided less than "
-                                     f"{n_folds} examples for at least one "
-                                     "class.")
-                cv = check_cv(self.cv, y, classifier=True)
 
+            # Check that each cross-validation fold can have at least one
+            # example per class
+            if isinstance(self.cv, int):
+                n_folds = self.cv
+            elif hasattr(self.cv, "n_splits"):
+                n_folds = self.cv.n_splits
+            else:
+                n_folds = None
+            if n_folds and np.any([np.sum(y == class_) < n_folds
+                                    for class_ in self.classes_]):
+                raise ValueError(f"Requesting {n_folds}-fold "
+                                 "cross-validation but provided less than "
+                                 f"{n_folds} examples for at least one class.")
+            cv = check_cv(self.cv, y, classifier=True)
+
+            if self.ensemble:
                 parallel = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
                                     pre_dispatch=self.pre_dispatch)
 
@@ -336,13 +337,16 @@ def fit(self, X, y, sample_weight=None):
                         method=self.method, classes=self.classes_,
                         supports_sw=supports_sw, sample_weight=sample_weight)
                     for train, test in cv.split(X, y))
-
             else:
                 this_estimator = clone(base_estimator)
-                method = _get_prediction_method(this_estimator)
-                pred_method = partial(cross_val_predict(
-                    this_estimator, X, y, cv=cv, method=method.__name__))
-                preds = _get_predictions(pred_method, X, n_classes)
+                method_name = _get_prediction_method(this_estimator).__name__
+                pred_method = partial(
+                    cross_val_predict, this_estimator, X, y, cv=cv,
+                    method=method_name
+                )
+                preds = _get_predictions(
+                    pred_method, method_name, X, n_classes
+                )
 
                 if sample_weight is not None and supports_sw:
                     this_estimator.fit(X, y, sample_weight)
@@ -458,7 +462,9 @@ def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
 
     n_classes = len(classes)
     pred_method = _get_prediction_method(estimator)
-    preds = _get_predictions(pred_method, X[test], n_classes)
+    preds = _get_predictions(
+        pred_method, pred_method.__name__, X[test], n_classes
+    )
 
     sw = None if sample_weight is None else sample_weight[test]
     calibrated_classifier = _fit_calibrator(
@@ -493,14 +499,18 @@ def _get_prediction_method(clf):
     return method
 
 
-def _get_predictions(pred_method, X, n_classes):
-    """Returns predictions for `X`.
+def _get_predictions(pred_method, method_name, X, n_classes):
+    """Returns predictions for `X` and reshapes binary outputs to shape
+    (n_samples, 1).
 
     Parameters
     ----------
     pred_method : callable
         Prediction method.
 
+    method_name : {'decision_function', 'predict_proba'}
+        The name of the method of the `pred_method` as str.
+
     X : array-like
         Data used to obtain predictions.
 
@@ -514,17 +524,16 @@ def _get_predictions(pred_method, X, n_classes):
         (X.shape[0], 1).
     """
     preds = pred_method(X)
-    method = pred_method.__name__
 
-    if method == 'decision_function':
+    if method_name == 'decision_function':
         if preds.ndim == 1:
             preds = preds[:, np.newaxis]
-    elif method == 'predict_proba':
+    elif method_name == 'predict_proba':
         if n_classes == 2:
             preds = preds[:, 1:]
     else:
-        raise RuntimeError("'method' needs to be one of 'decision_function' "
-                           "or 'predict_proba'.")
+        raise RuntimeError("'method_name' needs to be one of "
+                           "'decision_function' or 'predict_proba'.")
     return preds
 
 
@@ -619,7 +628,9 @@ def predict_proba(self, X):
         """
         pred_method = _get_prediction_method(self.clf)
         n_classes = len(self.classes)
-        preds = _get_predictions(pred_method, X, n_classes)
+        preds = _get_predictions(
+            pred_method, pred_method.__name__, X, n_classes
+        )
         label_encoder = LabelEncoder().fit(self.classes)
         pos_class_indices = label_encoder.transform(self.clf.classes_)
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 0586eac3bbb3b..4dfd27e8961ed 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -29,7 +29,8 @@
 from sklearn.calibration import calibration_curve
 
 
-def test_calibration():
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration(ensemble):
     """Test calibration objects with isotonic and sigmoid"""
     n_samples = 100
     X, y = make_classification(n_samples=2 * n_samples, n_features=6,
@@ -47,7 +48,7 @@ def test_calibration():
     clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
     prob_pos_clf = clf.predict_proba(X_test)[:, 1]
 
-    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
+    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1, ensemble=ensemble)
     assert_raises(ValueError, pc_clf.fit, X, y)
 
     # Naive Bayes with calibration
@@ -55,7 +56,9 @@ def test_calibration():
                                       (sparse.csr_matrix(X_train),
                                        sparse.csr_matrix(X_test))]:
         for method in ['isotonic', 'sigmoid']:
-            pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
+            pc_clf = CalibratedClassifierCV(
+                clf, method=method, cv=2, ensemble=ensemble
+            )
             # Note that this fit overwrites the fit on the entire training
             # set
             pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
@@ -94,13 +97,17 @@ def test_calibration():
 
         # Check failure cases:
         # only "isotonic" and "sigmoid" should be accepted as methods
-        clf_invalid_method = CalibratedClassifierCV(clf, method="foo")
+        clf_invalid_method = CalibratedClassifierCV(
+            clf, method="foo", ensemble=ensemble
+        )
         assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train)
 
         # base-estimators should provide either decision_function or
         # predict_proba (most regressors, for instance, should fail)
         clf_base_regressor = \
-            CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid")
+            CalibratedClassifierCV(
+                RandomForestRegressor(), method="sigmoid", ensemble=ensemble
+            )
         assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
 
 
@@ -114,21 +121,27 @@ def test_calibration_default_estimator():
     assert isinstance(base_est, LinearSVC)
 
 
-def test_calibration_cv_splitter():
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration_cv_splitter(ensemble):
     # Check when `cv` is a CV splitter
     X, y = make_classification(n_samples=100, n_features=6, random_state=42)
 
     splits = 5
     kfold = KFold(n_splits=splits)
-    calib_clf = CalibratedClassifierCV(cv=kfold)
+    calib_clf = CalibratedClassifierCV(cv=kfold, ensemble=ensemble)
     assert isinstance(calib_clf.cv, KFold)
     assert calib_clf.cv.n_splits == splits
 
     calib_clf.fit(X, y)
-    assert len(calib_clf.calibrated_classifiers_) == splits
+    if ensemble:
+        assert len(calib_clf.calibrated_classifiers_) == splits
+    else:
+        assert len(calib_clf.calibrated_classifiers_) == 1
 
 
-def test_sample_weight():
+@pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_sample_weight(method, ensemble):
     n_samples = 100
     X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                                random_state=42)
@@ -138,47 +151,51 @@ def test_sample_weight():
         X[:n_samples], y[:n_samples], sample_weight[:n_samples]
     X_test = X[n_samples:]
 
-    for method in ['sigmoid', 'isotonic']:
-        base_estimator = LinearSVC(random_state=42)
-        calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
-        calibrated_clf.fit(X_train, y_train, sample_weight=sw_train)
-        probs_with_sw = calibrated_clf.predict_proba(X_test)
+    base_estimator = LinearSVC(random_state=42)
+    calibrated_clf = CalibratedClassifierCV(
+        base_estimator, method=method, ensemble=ensemble
+    )
+    calibrated_clf.fit(X_train, y_train, sample_weight=sw_train)
+    probs_with_sw = calibrated_clf.predict_proba(X_test)
 
-        # As the weights are used for the calibration, they should still yield
-        # a different predictions
-        calibrated_clf.fit(X_train, y_train)
-        probs_without_sw = calibrated_clf.predict_proba(X_test)
+    # As the weights are used for the calibration, they should still yield
+    # different predictions
+    calibrated_clf.fit(X_train, y_train)
+    probs_without_sw = calibrated_clf.predict_proba(X_test)
 
-        diff = np.linalg.norm(probs_with_sw - probs_without_sw)
-        assert diff > 0.1
+    diff = np.linalg.norm(probs_with_sw - probs_without_sw)
+    assert diff > 0.1
 
 
-@pytest.mark.parametrize("method", ['sigmoid', 'isotonic'])
-def test_parallel_execution(method):
+@pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_parallel_execution(method, ensemble):
     """Test parallel calibration"""
     X, y = make_classification(random_state=42)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
 
     base_estimator = LinearSVC(random_state=42)
 
-    cal_clf_parallel = CalibratedClassifierCV(base_estimator,
-                                              method=method, n_jobs=2)
+    cal_clf_parallel = CalibratedClassifierCV(
+        base_estimator, method=method, n_jobs=2, ensemble=ensemble
+    )
     cal_clf_parallel.fit(X_train, y_train)
     probs_parallel = cal_clf_parallel.predict_proba(X_test)
 
-    cal_clf_sequential = CalibratedClassifierCV(base_estimator,
-                                                method=method,
-                                                n_jobs=1)
+    cal_clf_sequential = CalibratedClassifierCV(
+        base_estimator, method=method, n_jobs=1, ensemble=ensemble
+    )
     cal_clf_sequential.fit(X_train, y_train)
     probs_sequential = cal_clf_sequential.predict_proba(X_test)
 
     assert_allclose(probs_parallel, probs_sequential)
 
 
-def test_calibration_multiclass():
-    """Test calibration for multiclass """
-    # test multi-class setting with classifier that implements
-    # only decision function
+@pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration_multiclass(method, ensemble):
+    """Test calibration for multiclass with classifier that implements
+    only decision function."""
     clf = LinearSVC()
     X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                           centers=3, cluster_std=3.0)
@@ -192,27 +209,30 @@ def test_calibration_multiclass():
     X_test, y_test = X[1::2], y[1::2]
 
     clf.fit(X_train, y_train)
-    for method in ['isotonic', 'sigmoid']:
-        cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
-        cal_clf.fit(X_train, y_train)
-        probas = cal_clf.predict_proba(X_test)
-        assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))
-
-        # Check that log-loss of calibrated classifier is smaller than
-        # log-loss of naively turned OvR decision function to probabilities
-        # via softmax
-        def softmax(y_pred):
-            e = np.exp(-y_pred)
-            return e / e.sum(axis=1).reshape(-1, 1)
-
-        uncalibrated_log_loss = \
-            log_loss(y_test, softmax(clf.decision_function(X_test)))
-        calibrated_log_loss = log_loss(y_test, probas)
-        assert uncalibrated_log_loss >= calibrated_log_loss
+
+    cal_clf = CalibratedClassifierCV(
+        clf, method=method, cv=2, ensemble=ensemble
+    )
+    cal_clf.fit(X_train, y_train)
+    probas = cal_clf.predict_proba(X_test)
+    # Check probabilities sum to 1
+    assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))
+
+    # Check that log-loss of calibrated classifier is smaller than
+    # log-loss obtained by naively turning OvR decision function to
+    # probabilities via softmax
+    def softmax(y_pred):
+        e = np.exp(-y_pred)
+        return e / e.sum(axis=1).reshape(-1, 1)
+
+    uncalibrated_log_loss = \
+        log_loss(y_test, softmax(clf.decision_function(X_test)))
+    calibrated_log_loss = log_loss(y_test, probas)
+    assert uncalibrated_log_loss >= calibrated_log_loss
 
     # Test that calibration of a multiclass classifier decreases log-loss
     # for RandomForestClassifier
-    X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
+    X, y = make_blobs(n_samples=1500, n_features=2, random_state=42,
                       cluster_std=3.0)
     X_train, y_train = X[::2], y[::2]
     X_test, y_test = X[1::2], y[1::2]
@@ -222,12 +242,13 @@ def softmax(y_pred):
     clf_probs = clf.predict_proba(X_test)
     loss = log_loss(y_test, clf_probs)
 
-    for method in ['isotonic', 'sigmoid']:
-        cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
-        cal_clf.fit(X_train, y_train)
-        cal_clf_probs = cal_clf.predict_proba(X_test)
-        cal_loss = log_loss(y_test, cal_clf_probs)
-        assert loss > cal_loss
+    cal_clf = CalibratedClassifierCV(
+        clf, method=method, cv=3, ensemble=ensemble
+    )
+    cal_clf.fit(X_train, y_train)
+    cal_clf_probs = cal_clf.predict_proba(X_test)
+    cal_loss = log_loss(y_test, cal_clf_probs)
+    assert loss > cal_loss
 
 
 def test_calibration_prefit():
@@ -329,7 +350,8 @@ def test_calibration_curve():
                   strategy='percentile')
 
 
-def test_calibration_nan_imputer():
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration_nan_imputer(ensemble):
     """Test that calibration can accept nan"""
     X, y = make_classification(n_samples=10, n_features=2,
                                n_informative=2, n_redundant=0,
@@ -338,26 +360,32 @@ def test_calibration_nan_imputer():
     clf = Pipeline(
         [('imputer', SimpleImputer()),
          ('rf', RandomForestClassifier(n_estimators=1))])
-    clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
+    clf_c = CalibratedClassifierCV(
+        clf, cv=2, method='isotonic', ensemble=ensemble
+    )
     clf_c.fit(X, y)
     clf_c.predict(X)
 
 
-def test_calibration_prob_sum():
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration_prob_sum(ensemble):
     # Test that sum of probabilities is 1. A non-regression test for
     # issue #7796
     num_classes = 2
     X, y = make_classification(n_samples=10, n_features=5,
                                n_classes=num_classes)
     clf = LinearSVC(C=1.0)
-    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
+    clf_prob = CalibratedClassifierCV(
+        clf, method="sigmoid", cv=LeaveOneOut(), ensemble=ensemble
+    )
     clf_prob.fit(X, y)
 
     probs = clf_prob.predict_proba(X)
     assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
 
 
-def test_calibration_less_classes():
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration_less_classes(ensemble):
     # Test to check calibration works fine when train set in a test-train
     # split does not contain all classes
     # Since this test uses LOO, at each iteration train set will not contain a
@@ -365,15 +393,23 @@ def test_calibration_less_classes():
     X = np.random.randn(10, 5)
     y = np.arange(10)
     clf = LinearSVC(C=1.0)
-    cal_clf = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
+    cal_clf = CalibratedClassifierCV(
+        clf, method="sigmoid", cv=LeaveOneOut(), ensemble=ensemble
+    )
     cal_clf.fit(X, y)
 
     for i, calibrated_classifier in \
             enumerate(cal_clf.calibrated_classifiers_):
         proba = calibrated_classifier.predict_proba(X)
-        assert_array_equal(proba[:, i], np.zeros(len(y)))
-        assert np.all(np.hstack([proba[:, :i],
-                                 proba[:, i + 1:]]))
+        if ensemble:
+            # Check 'missing' class has proba=0
+            assert_array_equal(proba[:, i], np.zeros(len(y)))
+            # Check for all other classes proba!=0
+            assert np.all(np.hstack([proba[:, :i],
+                                     proba[:, i + 1:]]))
+        else:
+            # Check `proba` all 1/n_classes
+            assert np.allclose(proba, 1 / proba.shape[0])
 
 
 @ignore_warnings(category=FutureWarning)

From 82b885ffdd631519177d41de3c8c1e6c6199c4c7 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 29 Jul 2020 16:37:16 +0200
Subject: [PATCH 21/44] lint

---
 sklearn/calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 1d61de8b056bf..7d0769cbc2f74 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -321,7 +321,7 @@ def fit(self, X, y, sample_weight=None):
             else:
                 n_folds = None
             if n_folds and np.any([np.sum(y == class_) < n_folds
-                                    for class_ in self.classes_]):
+                                   for class_ in self.classes_]):
                 raise ValueError(f"Requesting {n_folds}-fold "
                                  "cross-validation but provided less than "
                                  f"{n_folds} examples for at least one class.")

From 1fa0b3161ad283e0a71c6e800abd8493ab36c561 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 29 Jul 2020 17:11:04 +0200
Subject: [PATCH 22/44] use kwarg in cross val predict

---
 sklearn/calibration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 7d0769cbc2f74..6067522cb6d58 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -341,8 +341,8 @@ def fit(self, X, y, sample_weight=None):
                 this_estimator = clone(base_estimator)
                 method_name = _get_prediction_method(this_estimator).__name__
                 pred_method = partial(
-                    cross_val_predict, this_estimator, X, y, cv=cv,
-                    method=method_name
+                    cross_val_predict, estimator=this_estimator, X=X, y=y,
+                    cv=cv, method=method_name
                 )
                 preds = _get_predictions(
                     pred_method, method_name, X, n_classes

From aaa87930686b1c7d073b963d7636b337e4f074eb Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 29 Jul 2020 17:54:20 +0200
Subject: [PATCH 23/44] wip

---
 sklearn/calibration.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 6067522cb6d58..06d981a472ac3 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -341,11 +341,11 @@ def fit(self, X, y, sample_weight=None):
                 this_estimator = clone(base_estimator)
                 method_name = _get_prediction_method(this_estimator).__name__
                 pred_method = partial(
-                    cross_val_predict, estimator=this_estimator, X=X, y=y,
-                    cv=cv, method=method_name
+                    cross_val_predict, this_estimator, X, y,
+                    cv, method=method_name
                 )
                 preds = _get_predictions(
-                    pred_method, method_name, X, n_classes
+                    pred_method, method_name, X=None, n_classes=n_classes
                 )
 
                 if sample_weight is not None and supports_sw:
@@ -511,8 +511,9 @@ def _get_predictions(pred_method, method_name, X, n_classes):
     method_name : {'decision_function', 'predict_proba'}
         The name of the method of the `pred_method` as str.
 
-    X : array-like
-        Data used to obtain predictions.
+    X : array-like or None
+        Data used to obtain predictions. If `None`, just call `pred_method`
+        to obtain predictions.
 
     n_classes : int
         Number of classes present.
@@ -523,7 +524,10 @@ def _get_predictions(pred_method, method_name, X, n_classes):
         The predictions. Note if there are 2 classes, array is of shape
         (X.shape[0], 1).
     """
-    preds = pred_method(X)
+    if X is None:
+        preds = pred_method()
+    else:
+        preds = pred_method(X)
 
     if method_name == 'decision_function':
         if preds.ndim == 1:

From 2282478a4441941f9df34b54c7b142ab8e59a147 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 29 Jul 2020 18:09:43 +0200
Subject: [PATCH 24/44] kwarg cv

---
 sklearn/calibration.py            | 14 +++++---------
 sklearn/tests/test_calibration.py |  8 ++++++++
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 06d981a472ac3..008c9938ef71c 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -341,11 +341,11 @@ def fit(self, X, y, sample_weight=None):
                 this_estimator = clone(base_estimator)
                 method_name = _get_prediction_method(this_estimator).__name__
                 pred_method = partial(
-                    cross_val_predict, this_estimator, X, y,
-                    cv, method=method_name
+                    cross_val_predict, this_estimator, X, y, cv=cv,
+                    method=method_name
                 )
                 preds = _get_predictions(
-                    pred_method, method_name, X=None, n_classes=n_classes
+                    pred_method, method_name, X, n_classes=n_classes
                 )
 
                 if sample_weight is not None and supports_sw:
@@ -512,8 +512,7 @@ def _get_predictions(pred_method, method_name, X, n_classes):
         The name of the method of the `pred_method` as str.
 
     X : array-like or None
-        Data used to obtain predictions. If `None`, just call `pred_method`
-        to obtain predictions.
+        Data used to obtain predictions.
 
     n_classes : int
         Number of classes present.
@@ -524,10 +523,7 @@ def _get_predictions(pred_method, method_name, X, n_classes):
         The predictions. Note if there are 2 classes, array is of shape
         (X.shape[0], 1).
     """
-    if X is None:
-        preds = pred_method()
-    else:
-        preds = pred_method(X)
+    preds = pred_method(X)
 
     if method_name == 'decision_function':
         if preds.ndim == 1:
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 4dfd27e8961ed..7242c18dbf0fa 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -29,6 +29,14 @@
 from sklearn.calibration import calibration_curve
 
 
+def test_ll():
+    X, y = make_classification(n_samples=10, n_features=6,
+                            random_state=42)
+    pc_clf = CalibratedClassifierCV(
+        cv=2, ensemble=False
+    )
+    pc_clf.fit(X,y)
+
 @pytest.mark.parametrize('ensemble', [True, False])
 def test_calibration(ensemble):
     """Test calibration objects with isotonic and sigmoid"""

From ec8158049014539278b441b0a3e7ee4eca38bc52 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 10:54:28 +0200
Subject: [PATCH 25/44] fix kwargs partial

---
 sklearn/calibration.py            | 6 +++---
 sklearn/tests/test_calibration.py | 8 --------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 008c9938ef71c..bcae69fa2cde5 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -341,8 +341,8 @@ def fit(self, X, y, sample_weight=None):
                 this_estimator = clone(base_estimator)
                 method_name = _get_prediction_method(this_estimator).__name__
                 pred_method = partial(
-                    cross_val_predict, this_estimator, X, y, cv=cv,
-                    method=method_name
+                    cross_val_predict, estimator=this_estimator, X=X, y=y,
+                    cv=cv, method=method_name
                 )
                 preds = _get_predictions(
                     pred_method, method_name, X, n_classes=n_classes
@@ -523,7 +523,7 @@ def _get_predictions(pred_method, method_name, X, n_classes):
         The predictions. Note if there are 2 classes, array is of shape
         (X.shape[0], 1).
     """
-    preds = pred_method(X)
+    preds = pred_method(X=X)
 
     if method_name == 'decision_function':
         if preds.ndim == 1:
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 7242c18dbf0fa..4dfd27e8961ed 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -29,14 +29,6 @@
 from sklearn.calibration import calibration_curve
 
 
-def test_ll():
-    X, y = make_classification(n_samples=10, n_features=6,
-                            random_state=42)
-    pc_clf = CalibratedClassifierCV(
-        cv=2, ensemble=False
-    )
-    pc_clf.fit(X,y)
-
 @pytest.mark.parametrize('ensemble', [True, False])
 def test_calibration(ensemble):
     """Test calibration objects with isotonic and sigmoid"""

From 1250cc56ca888fb39fea5620c697d985f0075a07 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 12:56:50 +0200
Subject: [PATCH 26/44] use signature get pred

---
 sklearn/calibration.py | 42 ++++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index bcae69fa2cde5..b068990094757 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -282,9 +282,7 @@ def fit(self, X, y, sample_weight=None):
 
             pred_method = _get_prediction_method(base_estimator)
             n_classes = len(self.classes_)
-            preds = _get_predictions(
-                pred_method, pred_method.__name__, X, n_classes
-            )
+            preds = _get_predictions(pred_method,X, n_classes)
 
             calibrated_classifier = _fit_calibrator(
                 base_estimator, preds, y, self.classes_, self.method,
@@ -344,9 +342,7 @@ def fit(self, X, y, sample_weight=None):
                     cross_val_predict, estimator=this_estimator, X=X, y=y,
                     cv=cv, method=method_name
                 )
-                preds = _get_predictions(
-                    pred_method, method_name, X, n_classes=n_classes
-                )
+                preds = _get_predictions(pred_method, X, n_classes)
 
                 if sample_weight is not None and supports_sw:
                     this_estimator.fit(X, y, sample_weight)
@@ -462,9 +458,7 @@ def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
 
     n_classes = len(classes)
     pred_method = _get_prediction_method(estimator)
-    preds = _get_predictions(
-        pred_method, pred_method.__name__, X[test], n_classes
-    )
+    preds = _get_predictions(pred_method, X[test], n_classes)
 
     sw = None if sample_weight is None else sample_weight[test]
     calibrated_classifier = _fit_calibrator(
@@ -489,17 +483,17 @@ def _get_prediction_method(clf):
     prediction_method : callable
         The prediction method.
     """
-    if hasattr(clf, "decision_function"):
-        method = getattr(clf, "decision_function")
-    elif hasattr(clf, "predict_proba"):
-        method = getattr(clf, "predict_proba")
+    if hasattr(clf, 'decision_function'):
+        method = getattr(clf, 'decision_function')
+    elif hasattr(clf, 'predict_proba'):
+        method = getattr(clf, 'predict_proba')
     else:
         raise RuntimeError("'base_estimator' has no 'decision_function' or "
                            "'predict_proba' method.")
     return method
 
 
-def _get_predictions(pred_method, method_name, X, n_classes):
+def _get_predictions(pred_method, X, n_classes):
     """Returns predictions for `X` and reshapes binary outputs to shape
     (n_samples, 1).
 
@@ -508,9 +502,6 @@ def _get_predictions(pred_method, method_name, X, n_classes):
     pred_method : callable
         Prediction method.
 
-    method_name : {'decision_function', 'predict_proba'}
-        The name of the method of the `pred_method` as str.
-
     X : array-like or None
         Data used to obtain predictions.
 
@@ -524,15 +515,19 @@ def _get_predictions(pred_method, method_name, X, n_classes):
         (X.shape[0], 1).
     """
     preds = pred_method(X=X)
+    if hasattr(pred_method, '__name__'):
+        method = pred_method.__name__
+    else:
+        method = signature(pred_method).parameters['method'].default
 
-    if method_name == 'decision_function':
+    if method == 'decision_function':
         if preds.ndim == 1:
             preds = preds[:, np.newaxis]
-    elif method_name == 'predict_proba':
+    elif method == 'predict_proba':
         if n_classes == 2:
             preds = preds[:, 1:]
     else:
-        raise RuntimeError("'method_name' needs to be one of "
+        raise RuntimeError("'pred_method' needs to be one of "
                            "'decision_function' or 'predict_proba'.")
     return preds
 
@@ -626,11 +621,10 @@ def predict_proba(self, X):
         proba : array, shape (n_samples, n_classes)
             The predicted probabilities. Can be exact zeros.
         """
-        pred_method = _get_prediction_method(self.clf)
         n_classes = len(self.classes)
-        preds = _get_predictions(
-            pred_method, pred_method.__name__, X, n_classes
-        )
+        pred_method = _get_prediction_method(self.clf)
+        preds = _get_predictions(pred_method, X, n_classes)
+
         label_encoder = LabelEncoder().fit(self.classes)
         pos_class_indices = label_encoder.transform(self.clf.classes_)
 

From 6dcac0c8dadabe8b0ba03f6b3ccf45f65b972da0 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 14:30:19 +0200
Subject: [PATCH 27/44] use signature get pred

---
 sklearn/calibration.py            | 16 +++++++++-------
 sklearn/tests/test_calibration.py | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index b068990094757..4aa9d89b16acd 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -282,7 +282,7 @@ def fit(self, X, y, sample_weight=None):
 
             pred_method = _get_prediction_method(base_estimator)
             n_classes = len(self.classes_)
-            preds = _get_predictions(pred_method,X, n_classes)
+            preds = _get_predictions(pred_method, X, n_classes)
 
             calibrated_classifier = _fit_calibrator(
                 base_estimator, preds, y, self.classes_, self.method,
@@ -334,7 +334,8 @@ def fit(self, X, y, sample_weight=None):
                         clone(base_estimator), X, y, train=train, test=test,
                         method=self.method, classes=self.classes_,
                         supports_sw=supports_sw, sample_weight=sample_weight)
-                    for train, test in cv.split(X, y))
+                    for train, test in cv.split(X, y)
+                )
             else:
                 this_estimator = clone(base_estimator)
                 method_name = _get_prediction_method(this_estimator).__name__
@@ -342,7 +343,7 @@ def fit(self, X, y, sample_weight=None):
                     cross_val_predict, estimator=this_estimator, X=X, y=y,
                     cv=cv, method=method_name
                 )
-                preds = _get_predictions(pred_method, X, n_classes)
+                preds =  (pred_method, X, n_classes)
 
                 if sample_weight is not None and supports_sw:
                     this_estimator.fit(X, y, sample_weight)
@@ -419,7 +420,7 @@ def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
 
     Parameters
     ----------
-    estimator : Estimator instance
+    estimator : estimator instance
         Cloned base estimator.
 
     X : array-like, shape (n_samples, n_features)
@@ -541,7 +542,7 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
 
     Parameters
     ----------
-    clf : Estimator instance
+    clf : estimator instance
         Fitted classifier.
 
     preds :  array-like, shape (n_samples, n_classes) or (n_samples, 1) when \
@@ -567,6 +568,7 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
     Y = label_binarize(y, classes=classes)
     label_encoder = LabelEncoder().fit(classes)
     pos_class_indices = label_encoder.transform(clf.classes_)
+    print(f'pos class indix fit calb {pos_class_indices}')
     calibrators = []
     for class_idx, this_pred in zip(pos_class_indices, preds.T):
         if method == 'isotonic':
@@ -588,10 +590,10 @@ class _CalibratedClassiferPipeline:
 
     Parameters
     ----------
-    clf : Estimator instance
+    clf : estimator instance
         Fitted classifier.
 
-    calibrators : List of fitted estimator instances
+    calibrators : list of fitted estimator instances
         List of fitted calibrators (either 'IsotonicRegression' or
         '_SigmoidCalibration'). The number of calibrators equals the number of
         classes. However, if there are 2 classes, the list contains only one
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 4dfd27e8961ed..7b568fef69ee8 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -16,10 +16,11 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.datasets import make_classification, make_blobs
 from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection import KFold
+from sklearn.model_selection import KFold, cross_val_predict
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.svm import LinearSVC
+from sklearn.isotonic import IsotonicRegression
 from sklearn.feature_extraction import DictVectorizer
 from sklearn.pipeline import Pipeline
 from sklearn.impute import SimpleImputer
@@ -297,6 +298,34 @@ def test_calibration_prefit():
                         brier_score_loss(y_test, prob_pos_pc_clf))
 
 
+@pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
+def test_calibration_ensemble(method):
+    """Test that `ensemble=False` is the same as using predictions from
+    `cross_val_predict` to train calibrator."""
+    X, y = make_classification(n_samples=100, n_features=6, random_state=7)
+    clf = LinearSVC(random_state=7)
+
+    cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
+    cal_clf.fit(X, y)
+    cal_probas = cal_clf.predict_proba(X[:10, :])
+    # print(f'calib proba\n{cal_probas}')
+
+    unbiased_preds = cross_val_predict(
+        clf, X, y, cv=3, method='decision_function'
+    )
+    if method == 'isotonic':
+        calibrator = IsotonicRegression(out_of_bounds='clip')
+    else:
+        calibrator = _SigmoidCalibration()
+    # print(f'unbiased pred shape {unbiased_preds.shape}')
+    calibrator.fit(unbiased_preds, y[:, 1])
+    # Fit `clf` using all data
+    clf.fit(X, y)
+    clf_probas = clf.decision_function(X[:10, :])
+    manual_probas = calibrator.predict(clf_probas[:, 0])
+    # print(f'man proba\n{manual_probas}')
+
+
 def test_sigmoid_calibration():
     """Test calibration values with Platt sigmoid model"""
     exF = np.array([5, -4, 1.0])

From 5e5f53bbf997803105214c68c4f1cb8b9e25bfa3 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 14:52:18 +0200
Subject: [PATCH 28/44] add test ensemble

---
 sklearn/calibration.py            |  7 +++----
 sklearn/tests/test_calibration.py | 17 ++++++++---------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 4aa9d89b16acd..efbb66ddd0660 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -343,7 +343,7 @@ def fit(self, X, y, sample_weight=None):
                     cross_val_predict, estimator=this_estimator, X=X, y=y,
                     cv=cv, method=method_name
                 )
-                preds =  (pred_method, X, n_classes)
+                preds = _get_predictions(pred_method, X, n_classes)
 
                 if sample_weight is not None and supports_sw:
                     this_estimator.fit(X, y, sample_weight)
@@ -545,8 +545,8 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
     clf : estimator instance
         Fitted classifier.
 
-    preds :  array-like, shape (n_samples, n_classes) or (n_samples, 1) when \
-             binary.
+    preds : array-like, shape (n_samples, n_classes) or (n_samples, 1) when \
+            binary.
         Predictions for calibrating the predictions.
 
     y : array-like, shape (n_samples,)
@@ -568,7 +568,6 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
     Y = label_binarize(y, classes=classes)
     label_encoder = LabelEncoder().fit(classes)
     pos_class_indices = label_encoder.transform(clf.classes_)
-    print(f'pos class indix fit calb {pos_class_indices}')
     calibrators = []
     for class_idx, this_pred in zip(pos_class_indices, preds.T):
         if method == 'isotonic':
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 7b568fef69ee8..4a449b75b08c4 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -299,17 +299,17 @@ def test_calibration_prefit():
 
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
-def test_calibration_ensemble(method):
+def test_calibration_ensemble_false(method):
     """Test that `ensemble=False` is the same as using predictions from
     `cross_val_predict` to train calibrator."""
     X, y = make_classification(n_samples=100, n_features=6, random_state=7)
     clf = LinearSVC(random_state=7)
 
-    cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
+    cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False)
     cal_clf.fit(X, y)
     cal_probas = cal_clf.predict_proba(X[:10, :])
-    # print(f'calib proba\n{cal_probas}')
 
+    # Get probas manually
     unbiased_preds = cross_val_predict(
         clf, X, y, cv=3, method='decision_function'
     )
@@ -317,13 +317,12 @@ def test_calibration_ensemble(method):
         calibrator = IsotonicRegression(out_of_bounds='clip')
     else:
         calibrator = _SigmoidCalibration()
-    # print(f'unbiased pred shape {unbiased_preds.shape}')
-    calibrator.fit(unbiased_preds, y[:, 1])
-    # Fit `clf` using all data
+    calibrator.fit(unbiased_preds, y)
+    # Use `clf` fit on all data
     clf.fit(X, y)
-    clf_probas = clf.decision_function(X[:10, :])
-    manual_probas = calibrator.predict(clf_probas[:, 0])
-    # print(f'man proba\n{manual_probas}')
+    clf_df = clf.decision_function(X[:10, :])
+    manual_probas = calibrator.predict(clf_df)
+    assert_allclose(cal_probas[:, 1], manual_probas)
 
 
 def test_sigmoid_calibration():

From 9c89a5da45ce13efd2892f83e50bfde1c31702c0 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 14:54:56 +0200
Subject: [PATCH 29/44] set rand state svc

---
 sklearn/tests/test_calibration.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 4a449b75b08c4..475eca507e8f4 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -197,7 +197,7 @@ def test_parallel_execution(method, ensemble):
 def test_calibration_multiclass(method, ensemble):
     """Test calibration for multiclass with classifier that implements
     only decision function."""
-    clf = LinearSVC()
+    clf = LinearSVC(random_state=7)
     X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                           centers=3, cluster_std=3.0)
 
@@ -402,7 +402,7 @@ def test_calibration_prob_sum(ensemble):
     num_classes = 2
     X, y = make_classification(n_samples=10, n_features=5,
                                n_classes=num_classes)
-    clf = LinearSVC(C=1.0)
+    clf = LinearSVC(C=1.0, random_state=7)
     clf_prob = CalibratedClassifierCV(
         clf, method="sigmoid", cv=LeaveOneOut(), ensemble=ensemble
     )
@@ -420,7 +420,7 @@ def test_calibration_less_classes(ensemble):
     # class label
     X = np.random.randn(10, 5)
     y = np.arange(10)
-    clf = LinearSVC(C=1.0)
+    clf = LinearSVC(C=1.0, random_state=7)
     cal_clf = CalibratedClassifierCV(
         clf, method="sigmoid", cv=LeaveOneOut(), ensemble=ensemble
     )

From 06c0088f75927fe874f1ba2343147ab5bd1bd8d6 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 15:23:56 +0200
Subject: [PATCH 30/44] update docs

---
 doc/modules/calibration.rst | 34 +++++++++++++++++++++-------------
 sklearn/calibration.py      |  7 +++----
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst
index cfc185c854edb..78edf290b6751 100644
--- a/doc/modules/calibration.rst
+++ b/doc/modules/calibration.rst
@@ -96,8 +96,8 @@ in [0, 1]. Denoting the output of the classifier for a given sample by :math:`f_
 the calibrator tries to predict :math:`p(y_i = 1 | f_i)`.
 
 The samples that are used to fit the calibrator should not be the same
-samples used to fit the classifier, as this would
-introduce bias. The classifier performance on its training data would be
+samples used to fit the classifier, as this would introduce bias.
+This is because performance of the classifier on its training data would be
 better than for novel data. Using the classifier output from training data
 to fit the calibrator would thus result in a biased calibrator that maps to
 probabilities closer to 0 and 1 than it should.
@@ -107,22 +107,30 @@ Usage
 
 The :class:`CalibratedClassifierCV` class is used to calibrate a classifier.
 
-:class:`CalibratedClassifierCV` uses a cross-validation approach to fit both
-the classifier and the regressor. The data is split into k
-`(train_set, test_set)` couples (as determined by `cv`). The classifier
-(`base_estimator`) is trained on the train set, and its predictions on the
-test set are used to fit a regressor. This ensures that the data used to fit
-the classifier is always disjoint from the data used to fit the calibrator.
-After fitting, we end up with k
-`(classifier, regressor)` couples where each regressor maps the output of
+:class:`CalibratedClassifierCV` uses a cross-validation approach to ensure
+unbiased data is always used to fit the calibrator. The data is split into k
+`(train_set, test_set)` couples (as determined by `cv`). When `ensemble=True`
+(default), the classifier (`base_estimator`) is trained on the train set, and
+its predictions on the test set are used to fit the calibrator (either a
+sigmoid or isotonic regressor). After fitting, we end up with an ensemble of
+k `(classifier, calibrator)` couples where each calibrator maps the output of
 its corresponding classifier into [0, 1]. Each couple is exposed in the
 `calibrated_classifiers_` attribute, where each entry is a calibrated
 classifier with a :term:`predict_proba` method that outputs calibrated
 probabilities. The output of :term:`predict_proba` for the main
 :class:`CalibratedClassifierCV` instance corresponds to the average of the
-predicted probabilities of the `k` estimators in the
-`calibrated_classifiers_` list. The output of :term:`predict` is the class
-that has the highest probability.
+predicted probabilities of the `k` estimators in the `calibrated_classifiers_`
+list. The output of :term:`predict` is the class that has the highest
+probability.
+
+When `ensemble=False`, cross-validation is used to obtain 'unbiased'
+predictions for all the data, by concatenating the testing subset predictions.
+These unbiased predictions are used to train the calibrator. The attribute
+`calibrated_classifiers_` consists of only one `(classifier, calibrator)`
+couple where the classifier is the `base_estimator` trained on all the data.
+In this case the output of :term:`predict_proba` for
+:class:`CalibratedClassifierCV` is the predicted probabilities obtained
+from the single `(classifier, calibrator)` couple.
 
 Alternatively an already fitted classifier can be calibrated by setting
 `cv="prefit"`. In this case, the data is not split and all of it is used to
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index efbb66ddd0660..060882fb3c4c0 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -46,10 +46,9 @@ class CalibratedClassifierCV(ClassifierMixin,
     using the testing subset. For prediction, predicted probabilities are
     averaged across these individual calibrated classifiers. When
     `ensemble=False`, cross-validation is used to obtain unbiased predictions,
-    from the testing subset. These are concatenated together and used for
-    calibration. For prediction, the base estimator, trained using all the
-    data, is used. This is the method implemented when `probabilities=True` for
-    :mod:`sklearn.svm` estimators.
+    from the testing subset, which are used for calibration. For prediction,
+    the base estimator, trained using all the data, is used. This is the method
+    implemented when `probabilities=True` for :mod:`sklearn.svm` estimators.
 
     Already fitted classifiers can be calibrated via the parameter
     `cv="prefit"`. In this case, no cross-validation is used and all provided

From 7c873e968f00c39a0021e68f8130b3bdcf69bb7c Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 17:56:41 +0200
Subject: [PATCH 31/44] suggestions

---
 doc/modules/calibration.rst       |  5 ++--
 sklearn/calibration.py            | 48 +++++++++++++++++--------------
 sklearn/tests/test_calibration.py | 16 +++++------
 3 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst
index 78edf290b6751..494ff69b4c299 100644
--- a/doc/modules/calibration.rst
+++ b/doc/modules/calibration.rst
@@ -124,8 +124,9 @@ list. The output of :term:`predict` is the class that has the highest
 probability.
 
 When `ensemble=False`, cross-validation is used to obtain 'unbiased'
-predictions for all the data, by concatenating the testing subset predictions.
-These unbiased predictions are used to train the calibrator. The attribute
+predictions for all the data, via
+:func:`~sklearn.model_selection.cross_val_predict`.
+These unbiased predictions are then used to train the calibrator. The attribute
 `calibrated_classifiers_` consists of only one `(classifier, calibrator)`
 couple where the classifier is the `base_estimator` trained on all the data.
 In this case the output of :term:`predict_proba` for
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 060882fb3c4c0..392388b91ce3b 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -46,9 +46,10 @@ class CalibratedClassifierCV(ClassifierMixin,
     using the testing subset. For prediction, predicted probabilities are
     averaged across these individual calibrated classifiers. When
     `ensemble=False`, cross-validation is used to obtain unbiased predictions,
-    from the testing subset, which are used for calibration. For prediction,
-    the base estimator, trained using all the data, is used. This is the method
-    implemented when `probabilities=True` for :mod:`sklearn.svm` estimators.
+    via :func:`~sklearn.model_selection.cross_val_predict`, which are then
+    used for calibration. For prediction, the base estimator, trained using all
+    the data, is used. This is the method implemented when `probabilities=True`
+    for :mod:`sklearn.svm` estimators.
 
     Already fitted classifiers can be calibrated via the parameter
     `cv="prefit"`. In this case, no cross-validation is used and all provided
@@ -75,7 +76,7 @@ class CalibratedClassifierCV(ClassifierMixin,
         ``(<<1000)`` since it tends to overfit.
 
     cv : integer, cross-validation generator, iterable or "prefit", \
-         default=None
+            default=None
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
 
@@ -144,10 +145,10 @@ class CalibratedClassifierCV(ClassifierMixin,
         `n_cv` is the number of cross-validation folds. The output is the
         average predicted probabilities of all pairs.
 
-        If `False`, `cv` is used to compute unbiased predictions (with the
-        test fold), which are concatenated and used to train the calibrator
-        (sigmoid or isotonic model). At prediction time, the classifier used
-        is the `base_estimator` trained on all the data.
+        If `False`, `cv` is used to compute unbiased predictions, via
+        :func:`~sklearn.model_selection.cross_val_predict`, which are then
+        used for calibration. At prediction time, the classifier used is the
+        `base_estimator` trained on all the data.
         Note this method is implemented when `probabilities=True` for
         :mod:`sklearn.svm` estimators.
 
@@ -340,7 +341,8 @@ def fit(self, X, y, sample_weight=None):
                 method_name = _get_prediction_method(this_estimator).__name__
                 pred_method = partial(
                     cross_val_predict, estimator=this_estimator, X=X, y=y,
-                    cv=cv, method=method_name
+                    cv=cv, method=method_name, n_jobs=self.n_jobs,
+                    verbose=self.verbose, pre_dispatch=self.pre_dispatch
                 )
                 preds = _get_predictions(pred_method, X, n_classes)
 
@@ -415,7 +417,7 @@ def _more_tags(self):
 
 def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
                              method, classes, sample_weight=None):
-    """Get predictions and fit calibrator for a given dataset split.
+    """Compute predictions and fit a calibrator for a given dataset split.
 
     Parameters
     ----------
@@ -494,7 +496,7 @@ def _get_prediction_method(clf):
 
 
 def _get_predictions(pred_method, X, n_classes):
-    """Returns predictions for `X` and reshapes binary outputs to shape
+    """Return predictions for `X` and reshape binary outputs to shape
     (n_samples, 1).
 
     Parameters
@@ -516,19 +518,19 @@ def _get_predictions(pred_method, X, n_classes):
     """
     preds = pred_method(X=X)
     if hasattr(pred_method, '__name__'):
-        method = pred_method.__name__
+        method_name = pred_method.__name__
     else:
-        method = signature(pred_method).parameters['method'].default
+        method_name = signature(pred_method).parameters['method'].default
 
-    if method == 'decision_function':
+    if method_name == 'decision_function':
         if preds.ndim == 1:
             preds = preds[:, np.newaxis]
-    elif method == 'predict_proba':
+    elif method_name == 'predict_proba':
         if n_classes == 2:
             preds = preds[:, 1:]
-    else:
-        raise RuntimeError("'pred_method' needs to be one of "
-                           "'decision_function' or 'predict_proba'.")
+    else:  # pragma: no cover
+        # this branch should be unreachable.
+        raise ValueError
     return preds
 
 
@@ -579,7 +581,9 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
         calibrator.fit(this_pred, Y[:, class_idx], sample_weight)
         calibrators.append(calibrator)
 
-    pipeline = _CalibratedClassiferPipeline(clf, calibrators, classes)
+    pipeline = _CalibratedClassiferPipeline(
+        clf, calibrators, method=method, classes
+    )
     return pipeline
 
 
@@ -600,9 +604,11 @@ class _CalibratedClassiferPipeline:
     classes : ndarray, shape (n_classes,)
         All the prediction classes.
     """
-    def __init__(self, clf, calibrators, classes):
-        self.clf = clf
+    def __init__(self, base_estimator, calibrators, *, method='sigmoid',
+                 classes):
+        self.base_estimator = base_estimator
         self.calibrators = calibrators
+        self.method = method
         self.classes = classes
 
     def predict_proba(self, X):
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 475eca507e8f4..d6bad9da0cb73 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -118,7 +118,7 @@ def test_calibration_default_estimator():
     calib_clf = CalibratedClassifierCV(cv=2)
     calib_clf.fit(X, y)
 
-    base_est = calib_clf.calibrated_classifiers_[0].clf
+    base_est = calib_clf.calibrated_classifiers_[0].base_estimator
     assert isinstance(base_est, LinearSVC)
 
 
@@ -195,8 +195,8 @@ def test_parallel_execution(method, ensemble):
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
 @pytest.mark.parametrize('ensemble', [True, False])
 def test_calibration_multiclass(method, ensemble):
-    """Test calibration for multiclass with classifier that implements
-    only decision function."""
+    # Test calibration for multiclass with classifier that implements
+    # only decision function."""
     clf = LinearSVC(random_state=7)
     X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                           centers=3, cluster_std=3.0)
@@ -300,14 +300,14 @@ def test_calibration_prefit():
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
 def test_calibration_ensemble_false(method):
-    """Test that `ensemble=False` is the same as using predictions from
-    `cross_val_predict` to train calibrator."""
+    # Test that `ensemble=False` is the same as using predictions from
+    # `cross_val_predict` to train calibrator.
     X, y = make_classification(n_samples=100, n_features=6, random_state=7)
     clf = LinearSVC(random_state=7)
 
     cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False)
     cal_clf.fit(X, y)
-    cal_probas = cal_clf.predict_proba(X[:10, :])
+    cal_probas = cal_clf.predict_proba(X)
 
     # Get probas manually
     unbiased_preds = cross_val_predict(
@@ -320,7 +320,7 @@ def test_calibration_ensemble_false(method):
     calibrator.fit(unbiased_preds, y)
     # Use `clf` fit on all data
     clf.fit(X, y)
-    clf_df = clf.decision_function(X[:10, :])
+    clf_df = clf.decision_function(X)
     manual_probas = calibrator.predict(clf_df)
     assert_allclose(cal_probas[:, 1], manual_probas)
 
@@ -436,7 +436,7 @@ def test_calibration_less_classes(ensemble):
             assert np.all(np.hstack([proba[:, :i],
                                      proba[:, i + 1:]]))
         else:
-            # Check `proba` all 1/n_classes
+            # Check `proba` are all 1/n_classes
             assert np.allclose(proba, 1 / proba.shape[0])
 
 

From 3495343314546eb10322b11d87d9af498753774a Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 18:41:20 +0200
Subject: [PATCH 32/44] param test_calib, break into 3 tests, add data fixture

---
 sklearn/calibration.py            |   8 +-
 sklearn/tests/test_calibration.py | 131 ++++++++++++++++--------------
 2 files changed, 77 insertions(+), 62 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 392388b91ce3b..c5fc21906ec83 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -582,7 +582,7 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
         calibrators.append(calibrator)
 
     pipeline = _CalibratedClassiferPipeline(
-        clf, calibrators, method=method, classes
+        clf, calibrators, method=method, classes=classes
     )
     return pipeline
 
@@ -628,11 +628,13 @@ def predict_proba(self, X):
             The predicted probabilities. Can be exact zeros.
         """
         n_classes = len(self.classes)
-        pred_method = _get_prediction_method(self.clf)
+        pred_method = _get_prediction_method(self.base_estimator)
         preds = _get_predictions(pred_method, X, n_classes)
 
         label_encoder = LabelEncoder().fit(self.classes)
-        pos_class_indices = label_encoder.transform(self.clf.classes_)
+        pos_class_indices = label_encoder.transform(
+            self.base_estimator.classes_
+        )
 
         proba = np.zeros((X.shape[0], n_classes))
         for class_idx, this_pred, calibrator in \
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index d6bad9da0cb73..ec37285e0faa0 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -30,12 +30,20 @@
 from sklearn.calibration import calibration_curve
 
 
+@pytest.fixture(scope="module")
+def data():
+    X, y = make_classification(
+        n_samples=200, n_features=6, random_state=42
+    )
+    return X, y
+
+
+@pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
 @pytest.mark.parametrize('ensemble', [True, False])
-def test_calibration(ensemble):
-    """Test calibration objects with isotonic and sigmoid"""
+def test_calibration(data, method, ensemble):
+    # Test calibration objects with isotonic and sigmoid
     n_samples = 100
-    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
-                               random_state=42)
+    X, y = data
     sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)
 
     X -= X.min()  # MultinomialNB only allows positive X
@@ -56,65 +64,71 @@ def test_calibration(ensemble):
     for this_X_train, this_X_test in [(X_train, X_test),
                                       (sparse.csr_matrix(X_train),
                                        sparse.csr_matrix(X_test))]:
-        for method in ['isotonic', 'sigmoid']:
-            pc_clf = CalibratedClassifierCV(
-                clf, method=method, cv=2, ensemble=ensemble
-            )
-            # Note that this fit overwrites the fit on the entire training
-            # set
-            pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
-            prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]
-
-            # Check that brier score has improved after calibration
-            assert (brier_score_loss(y_test, prob_pos_clf) >
-                    brier_score_loss(y_test, prob_pos_pc_clf))
 
-            # Check invariance against relabeling [0, 1] -> [1, 2]
-            pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
-            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
+        pc_clf = CalibratedClassifierCV(
+            clf, method=method, cv=2, ensemble=ensemble
+        )
+        # Note that this fit overwrites the fit on the entire training
+        # set
+        pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
+        prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]
+
+        # Check that brier score has improved after calibration
+        assert (brier_score_loss(y_test, prob_pos_clf) >
+                brier_score_loss(y_test, prob_pos_pc_clf))
+
+        # Check invariance against relabeling [0, 1] -> [1, 2]
+        pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
+        prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
+        assert_array_almost_equal(prob_pos_pc_clf,
+                                    prob_pos_pc_clf_relabeled)
+
+        # Check invariance against relabeling [0, 1] -> [-1, 1]
+        pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
+        prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
+        assert_array_almost_equal(prob_pos_pc_clf,
+                                    prob_pos_pc_clf_relabeled)
+
+        # Check invariance against relabeling [0, 1] -> [1, 0]
+        pc_clf.fit(this_X_train, (y_train + 1) % 2,
+                    sample_weight=sw_train)
+        prob_pos_pc_clf_relabeled = \
+            pc_clf.predict_proba(this_X_test)[:, 1]
+        if method == "sigmoid":
             assert_array_almost_equal(prob_pos_pc_clf,
-                                      prob_pos_pc_clf_relabeled)
+                                        1 - prob_pos_pc_clf_relabeled)
+        else:
+            # Isotonic calibration is not invariant against relabeling
+            # but should improve in both cases
+            assert (brier_score_loss(y_test, prob_pos_clf) >
+                    brier_score_loss((y_test + 1) % 2,
+                                        prob_pos_pc_clf_relabeled))
 
-            # Check invariance against relabeling [0, 1] -> [-1, 1]
-            pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
-            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
-            assert_array_almost_equal(prob_pos_pc_clf,
-                                      prob_pos_pc_clf_relabeled)
-
-            # Check invariance against relabeling [0, 1] -> [1, 0]
-            pc_clf.fit(this_X_train, (y_train + 1) % 2,
-                       sample_weight=sw_train)
-            prob_pos_pc_clf_relabeled = \
-                pc_clf.predict_proba(this_X_test)[:, 1]
-            if method == "sigmoid":
-                assert_array_almost_equal(prob_pos_pc_clf,
-                                          1 - prob_pos_pc_clf_relabeled)
-            else:
-                # Isotonic calibration is not invariant against relabeling
-                # but should improve in both cases
-                assert (brier_score_loss(y_test, prob_pos_clf) >
-                        brier_score_loss((y_test + 1) % 2,
-                                         prob_pos_pc_clf_relabeled))
 
-        # Check failure cases:
-        # only "isotonic" and "sigmoid" should be accepted as methods
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration_bad_method(data, ensemble):
+        # Check only "isotonic" and "sigmoid" are accepted as methods
+        X, y = data
+        clf = LinearSVC()
         clf_invalid_method = CalibratedClassifierCV(
             clf, method="foo", ensemble=ensemble
         )
-        assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train)
+        assert_raises(ValueError, clf_invalid_method.fit, X, y)
+
 
-        # base-estimators should provide either decision_function or
+@pytest.mark.parametrize('ensemble', [True, False])
+def test_calibration_regressor(data, ensemble):
+        # `base-estimator` should provide either decision_function or
         # predict_proba (most regressors, for instance, should fail)
+        X, y = data
         clf_base_regressor = \
-            CalibratedClassifierCV(
-                RandomForestRegressor(), method="sigmoid", ensemble=ensemble
-            )
-        assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
+            CalibratedClassifierCV(RandomForestRegressor(), ensemble=ensemble)
+        assert_raises(RuntimeError, clf_base_regressor.fit, X, y)
 
 
-def test_calibration_default_estimator():
+def test_calibration_default_estimator(data):
     # Check base_estimator default is LinearSVC
-    X, y = make_classification(n_samples=100, n_features=6, random_state=42)
+    X, y = data
     calib_clf = CalibratedClassifierCV(cv=2)
     calib_clf.fit(X, y)
 
@@ -123,9 +137,9 @@ def test_calibration_default_estimator():
 
 
 @pytest.mark.parametrize('ensemble', [True, False])
-def test_calibration_cv_splitter(ensemble):
+def test_calibration_cv_splitter(data, ensemble):
     # Check when `cv` is a CV splitter
-    X, y = make_classification(n_samples=100, n_features=6, random_state=42)
+    X, y = data
 
     splits = 5
     kfold = KFold(n_splits=splits)
@@ -142,10 +156,9 @@ def test_calibration_cv_splitter(ensemble):
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
 @pytest.mark.parametrize('ensemble', [True, False])
-def test_sample_weight(method, ensemble):
+def test_sample_weight(data, method, ensemble):
     n_samples = 100
-    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
-                               random_state=42)
+    X, y = data
 
     sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
     X_train, y_train, sw_train = \
@@ -170,9 +183,9 @@ def test_sample_weight(method, ensemble):
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
 @pytest.mark.parametrize('ensemble', [True, False])
-def test_parallel_execution(method, ensemble):
+def test_parallel_execution(data, method, ensemble):
     """Test parallel calibration"""
-    X, y = make_classification(random_state=42)
+    X, y = data
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
 
     base_estimator = LinearSVC(random_state=42)
@@ -299,10 +312,10 @@ def test_calibration_prefit():
 
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
-def test_calibration_ensemble_false(method):
+def test_calibration_ensemble_false(data, method):
     # Test that `ensemble=False` is the same as using predictions from
     # `cross_val_predict` to train calibrator.
-    X, y = make_classification(n_samples=100, n_features=6, random_state=7)
+    X, y = data
     clf = LinearSVC(random_state=7)
 
     cal_clf = CalibratedClassifierCV(clf, method=method, cv=3, ensemble=False)

From 641fad32133b65af488fc06c1acbdde3a53c6c9f Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 30 Jul 2020 18:45:56 +0200
Subject: [PATCH 33/44] lint

---
 sklearn/tests/test_calibration.py | 42 ++++++++++++++-----------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index ec37285e0faa0..86690d03bdc2e 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -64,7 +64,6 @@ def test_calibration(data, method, ensemble):
     for this_X_train, this_X_test in [(X_train, X_test),
                                       (sparse.csr_matrix(X_train),
                                        sparse.csr_matrix(X_test))]:
-
         pc_clf = CalibratedClassifierCV(
             clf, method=method, cv=2, ensemble=ensemble
         )
@@ -81,49 +80,46 @@ def test_calibration(data, method, ensemble):
         pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
         prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
         assert_array_almost_equal(prob_pos_pc_clf,
-                                    prob_pos_pc_clf_relabeled)
+                                  prob_pos_pc_clf_relabeled)
 
         # Check invariance against relabeling [0, 1] -> [-1, 1]
         pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
         prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
-        assert_array_almost_equal(prob_pos_pc_clf,
-                                    prob_pos_pc_clf_relabeled)
+        assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled)
 
         # Check invariance against relabeling [0, 1] -> [1, 0]
-        pc_clf.fit(this_X_train, (y_train + 1) % 2,
-                    sample_weight=sw_train)
-        prob_pos_pc_clf_relabeled = \
-            pc_clf.predict_proba(this_X_test)[:, 1]
+        pc_clf.fit(this_X_train, (y_train + 1) % 2, sample_weight=sw_train)
+        prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
         if method == "sigmoid":
             assert_array_almost_equal(prob_pos_pc_clf,
-                                        1 - prob_pos_pc_clf_relabeled)
+                                      1 - prob_pos_pc_clf_relabeled)
         else:
             # Isotonic calibration is not invariant against relabeling
             # but should improve in both cases
             assert (brier_score_loss(y_test, prob_pos_clf) >
                     brier_score_loss((y_test + 1) % 2,
-                                        prob_pos_pc_clf_relabeled))
+                                     prob_pos_pc_clf_relabeled))
 
 
 @pytest.mark.parametrize('ensemble', [True, False])
 def test_calibration_bad_method(data, ensemble):
-        # Check only "isotonic" and "sigmoid" are accepted as methods
-        X, y = data
-        clf = LinearSVC()
-        clf_invalid_method = CalibratedClassifierCV(
-            clf, method="foo", ensemble=ensemble
-        )
-        assert_raises(ValueError, clf_invalid_method.fit, X, y)
+    # Check only "isotonic" and "sigmoid" are accepted as methods
+    X, y = data
+    clf = LinearSVC()
+    clf_invalid_method = CalibratedClassifierCV(
+        clf, method="foo", ensemble=ensemble
+    )
+    assert_raises(ValueError, clf_invalid_method.fit, X, y)
 
 
 @pytest.mark.parametrize('ensemble', [True, False])
 def test_calibration_regressor(data, ensemble):
-        # `base-estimator` should provide either decision_function or
-        # predict_proba (most regressors, for instance, should fail)
-        X, y = data
-        clf_base_regressor = \
-            CalibratedClassifierCV(RandomForestRegressor(), ensemble=ensemble)
-        assert_raises(RuntimeError, clf_base_regressor.fit, X, y)
+    # `base-estimator` should provide either decision_function or
+    # predict_proba (most regressors, for instance, should fail)
+    X, y = data
+    clf_base_regressor = \
+        CalibratedClassifierCV(RandomForestRegressor(), ensemble=ensemble)
+    assert_raises(RuntimeError, clf_base_regressor.fit, X, y)
 
 
 def test_calibration_default_estimator(data):

From 7fae55971b9765a3f245427fb984f39ef695a6e7 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Fri, 31 Jul 2020 18:39:25 +0200
Subject: [PATCH 34/44] og suggests

---
 doc/modules/calibration.rst       | 13 +++---
 sklearn/calibration.py            | 66 ++++++++++++++++---------------
 sklearn/tests/test_calibration.py |  8 ++--
 3 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst
index 494ff69b4c299..c41c9e0061317 100644
--- a/doc/modules/calibration.rst
+++ b/doc/modules/calibration.rst
@@ -110,12 +110,13 @@ The :class:`CalibratedClassifierCV` class is used to calibrate a classifier.
 :class:`CalibratedClassifierCV` uses a cross-validation approach to ensure
 unbiased data is always used to fit the calibrator. The data is split into k
 `(train_set, test_set)` couples (as determined by `cv`). When `ensemble=True`
-(default), the classifier (`base_estimator`) is trained on the train set, and
-its predictions on the test set are used to fit the calibrator (either a
-sigmoid or isotonic regressor). After fitting, we end up with an ensemble of
-k `(classifier, calibrator)` couples where each calibrator maps the output of
-its corresponding classifier into [0, 1]. Each couple is exposed in the
-`calibrated_classifiers_` attribute, where each entry is a calibrated
+(default), the following procedure is repeated independently for each
+cross-validation split: a clone of `base_estimator` is first trained on the
+train subset. Then its predictions on the test subset are used to fit a
+calibrator (either a sigmoid or isotonic regressor). This results in an
+ensemble of k `(classifier, calibrator)` couples where each calibrator maps
+the output of its corresponding classifier into [0, 1]. Each couple is exposed
+in the `calibrated_classifiers_` attribute, where each entry is a calibrated
 classifier with a :term:`predict_proba` method that outputs calibrated
 probabilities. The output of :term:`predict_proba` for the main
 :class:`CalibratedClassifierCV` instance corresponds to the average of the
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index c5fc21906ec83..452903bc3ad61 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -282,10 +282,10 @@ def fit(self, X, y, sample_weight=None):
 
             pred_method = _get_prediction_method(base_estimator)
             n_classes = len(self.classes_)
-            preds = _get_predictions(pred_method, X, n_classes)
+            predictions = _compute_predictions(pred_method, X, n_classes)
 
             calibrated_classifier = _fit_calibrator(
-                base_estimator, preds, y, self.classes_, self.method,
+                base_estimator, predictions, y, self.classes_, self.method,
                 sample_weight
             )
             self.calibrated_classifiers_.append(calibrated_classifier)
@@ -330,7 +330,7 @@ def fit(self, X, y, sample_weight=None):
                                     pre_dispatch=self.pre_dispatch)
 
                 self.calibrated_classifiers_ = parallel(
-                    delayed(_get_pred_fit_calibrator)(
+                    delayed(_fit_classifier_calibrator_pair)(
                         clone(base_estimator), X, y, train=train, test=test,
                         method=self.method, classes=self.classes_,
                         supports_sw=supports_sw, sample_weight=sample_weight)
@@ -344,14 +344,14 @@ def fit(self, X, y, sample_weight=None):
                     cv=cv, method=method_name, n_jobs=self.n_jobs,
                     verbose=self.verbose, pre_dispatch=self.pre_dispatch
                 )
-                preds = _get_predictions(pred_method, X, n_classes)
+                predictions = _compute_predictions(pred_method, X, n_classes)
 
                 if sample_weight is not None and supports_sw:
                     this_estimator.fit(X, y, sample_weight)
                 else:
                     this_estimator.fit(X, y)
                 calibrated_classifier = _fit_calibrator(
-                    this_estimator, preds, y, self.classes_, self.method,
+                    this_estimator, predictions, y, self.classes_, self.method,
                     sample_weight
                 )
                 self.calibrated_classifiers_.append(calibrated_classifier)
@@ -415,9 +415,13 @@ def _more_tags(self):
         }
 
 
-def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
-                             method, classes, sample_weight=None):
-    """Compute predictions and fit a calibrator for a given dataset split.
+def _fit_classifier_calibrator_pair(estimator, X, y, train, test, supports_sw,
+                                    method, classes, sample_weight=None):
+    """Fit a classifier/calibration pair on a given train/test split.
+
+    Fit the classifier on the train set, compute its predictions on the test
+    set and use the predictions as input to fit the calibrator along with the
+    test labels.
 
     Parameters
     ----------
@@ -450,7 +454,7 @@ def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
 
     Returns
     -------
-    calibrated_classifier : _CalibratedClassiferPipeline instance
+    calibrated_classifier : _CalibratedClassifier instance
     """
     if sample_weight is not None and supports_sw:
         estimator.fit(X[train], y[train],
@@ -460,11 +464,11 @@ def _get_pred_fit_calibrator(estimator, X, y, train, test, supports_sw,
 
     n_classes = len(classes)
     pred_method = _get_prediction_method(estimator)
-    preds = _get_predictions(pred_method, X[test], n_classes)
+    predictions = _compute_predictions(pred_method, X[test], n_classes)
 
     sw = None if sample_weight is None else sample_weight[test]
     calibrated_classifier = _fit_calibrator(
-        estimator, preds, y[test], classes, method, sample_weight=sw
+        estimator, predictions, y[test], classes, method, sample_weight=sw
     )
     return calibrated_classifier
 
@@ -495,7 +499,7 @@ def _get_prediction_method(clf):
     return method
 
 
-def _get_predictions(pred_method, X, n_classes):
+def _compute_predictions(pred_method, X, n_classes):
     """Return predictions for `X` and reshape binary outputs to shape
     (n_samples, 1).
 
@@ -512,30 +516,30 @@ def _get_predictions(pred_method, X, n_classes):
 
     Returns
     -------
-    preds : array-like, shape (X.shape[0], len(clf.classes_))
+    predictions : array-like, shape (X.shape[0], len(clf.classes_))
         The predictions. Note if there are 2 classes, array is of shape
         (X.shape[0], 1).
     """
-    preds = pred_method(X=X)
+    predictions = pred_method(X=X)
     if hasattr(pred_method, '__name__'):
         method_name = pred_method.__name__
     else:
         method_name = signature(pred_method).parameters['method'].default
 
     if method_name == 'decision_function':
-        if preds.ndim == 1:
-            preds = preds[:, np.newaxis]
+        if predictions.ndim == 1:
+            predictions = predictions[:, np.newaxis]
     elif method_name == 'predict_proba':
         if n_classes == 2:
-            preds = preds[:, 1:]
+            predictions = predictions[:, 1:]
     else:  # pragma: no cover
         # this branch should be unreachable.
-        raise ValueError
-    return preds
+        raise ValueError(f"Invalid prediction method: {method_name}")
+    return predictions
 
 
-def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
-    """Fit calibrator(s) and return a `_CalibratedClassiferPipeline`
+def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None):
+    """Fit calibrator(s) and return a `_CalibratedClassifier`
     instance.
 
     `n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted.
@@ -546,9 +550,9 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
     clf : estimator instance
         Fitted classifier.
 
-    preds : array-like, shape (n_samples, n_classes) or (n_samples, 1) when \
-            binary.
-        Predictions for calibrating the predictions.
+    predictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) \
+                    when binary.
+        Raw predictions returned by the un-calibrated base classifier.
 
     y : array-like, shape (n_samples,)
         The targets.
@@ -564,13 +568,13 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
 
     Returns
     -------
-    pipeline : _CalibratedClassiferPipeline instance
+    pipeline : _CalibratedClassifier instance
     """
     Y = label_binarize(y, classes=classes)
     label_encoder = LabelEncoder().fit(classes)
     pos_class_indices = label_encoder.transform(clf.classes_)
     calibrators = []
-    for class_idx, this_pred in zip(pos_class_indices, preds.T):
+    for class_idx, this_pred in zip(pos_class_indices, predictions.T):
         if method == 'isotonic':
             calibrator = IsotonicRegression(out_of_bounds='clip')
         elif method == 'sigmoid':
@@ -581,13 +585,13 @@ def _fit_calibrator(clf, preds, y, classes, method, sample_weight=None):
         calibrator.fit(this_pred, Y[:, class_idx], sample_weight)
         calibrators.append(calibrator)
 
-    pipeline = _CalibratedClassiferPipeline(
+    pipeline = _CalibratedClassifier(
         clf, calibrators, method=method, classes=classes
     )
     return pipeline
 
 
-class _CalibratedClassiferPipeline:
+class _CalibratedClassifier:
     """Pipeline-like chaining a fitted classifier and its fitted calibrators.
 
     Parameters
@@ -629,7 +633,7 @@ def predict_proba(self, X):
         """
         n_classes = len(self.classes)
         pred_method = _get_prediction_method(self.base_estimator)
-        preds = _get_predictions(pred_method, X, n_classes)
+        predictions = _compute_predictions(pred_method, X, n_classes)
 
         label_encoder = LabelEncoder().fit(self.classes)
         pos_class_indices = label_encoder.transform(
@@ -638,9 +642,9 @@ def predict_proba(self, X):
 
         proba = np.zeros((X.shape[0], n_classes))
         for class_idx, this_pred, calibrator in \
-                zip(pos_class_indices, preds.T, self.calibrators):
+                zip(pos_class_indices, predictions.T, self.calibrators):
             if n_classes == 2:
-                # When binary, `preds` consists only of predictions for
+                # When binary, `predictions` consists only of predictions for
                 # clf.classes_[1] but `pos_class_indices` = 0
                 class_idx += 1
             proba[:, class_idx] = calibrator.predict(this_pred)
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 86690d03bdc2e..bb749c3f5c88c 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -439,11 +439,11 @@ def test_calibration_less_classes(ensemble):
             enumerate(cal_clf.calibrated_classifiers_):
         proba = calibrated_classifier.predict_proba(X)
         if ensemble:
-            # Check 'missing' class has proba=0
+            # Check that the unobserved class has proba=0
             assert_array_equal(proba[:, i], np.zeros(len(y)))
-            # Check for all other classes proba!=0
-            assert np.all(np.hstack([proba[:, :i],
-                                     proba[:, i + 1:]]))
+            # Check for all other classes proba>0
+            assert np.all(proba[:, :i] > 0)
+            assert np.all(proba[:, i + 1:] > 0)
         else:
             # Check `proba` are all 1/n_classes
             assert np.allclose(proba, 1 / proba.shape[0])

From 900a02f61820baa274c70eba827c23672e618f52 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Fri, 31 Jul 2020 20:33:47 +0200
Subject: [PATCH 35/44] pred -> predictons

---
 sklearn/calibration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 0e88986efc966..4d4607abbc65b 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -695,10 +695,10 @@ def _sigmoid_calibration(predictions, y, sample_weight=None):
     ----------
     Platt, "Probabilistic Outputs for Support Vector Machines"
     """
-    pred = column_or_1d(pred)
+    predictions = column_or_1d(predictions)
     y = column_or_1d(y)
 
-    F = pred  # F follows Platt's notations
+    F = predictions  # F follows Platt's notations
 
     # Bayesian priors (see Platt end of section 2.2)
     prior0 = float(np.sum(y <= 0))

From b08d1dbc1b3685cf1345089fe4e66eb3a59c8dfa Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 27 Aug 2020 11:50:11 +0200
Subject: [PATCH 36/44] suggestion

---
 sklearn/calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 4d4607abbc65b..aac93d8fee3e3 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -158,7 +158,7 @@ class CalibratedClassifierCV(ClassifierMixin,
         The class labels.
 
     calibrated_classifiers_ : list (len() equal to cv or 1 if `cv="prefit"` \
-                              or `ensemble=False`)
+            or `ensemble=False`)
         The list of classifier and calibrator pairs.
 
         - When `cv="prefit"`, the fitted `base_estimator` and fitted

From 1b2e3a1f1b20700a7a7f50bc90004446f4f176d2 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Sat, 17 Oct 2020 19:18:23 +1300
Subject: [PATCH 37/44] suggestions, whats new

---
 doc/modules/calibration.rst       |  2 +-
 doc/whats_new/v0.24.rst           |  8 +++++
 sklearn/calibration.py            | 21 ++++++-------
 sklearn/tests/test_calibration.py | 50 +++++++++++++++----------------
 4 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst
index c41c9e0061317..b145e2da18c26 100644
--- a/doc/modules/calibration.rst
+++ b/doc/modules/calibration.rst
@@ -98,7 +98,7 @@ the calibrator tries to predict :math:`p(y_i = 1 | f_i)`.
 The samples that are used to fit the calibrator should not be the same
 samples used to fit the classifier, as this would introduce bias.
 This is because performance of the classifier on its training data would be
-better than for novel data. Using the classifier output from training data
+better than for novel data. Using the classifier output of training data
 to fit the calibrator would thus result in a biased calibrator that maps to
 probabilities closer to 0 and 1 than it should.
 
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index f35dcc8f72ee4..4cff6dfac594b 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -58,6 +58,14 @@ Changelog
   sparse matrix or dataframe at the start. :pr:`17546` by
   :user:`Lucy Liu <lucyleeow>`.
 
+- |Enhancement| Refactor :class:`calibration.CalibratedClassifierCV` so the
+  chaining of the classifier to its corresponding calibrator and fitting of
+  the calibrator is now separate. Also add `ensemble` parameter, which
+  implements calibration via an ensemble of calibrators (current method) or
+  just one calibrator using all the data (method implemented in
+  :mod:`sklearn.svm` estimators with the `probabilities=True` parameter).
+  :pr:`17856` by :user:`Lucy Liu <lucyleeow>`.
+
 :mod:`sklearn.cluster`
 ......................
 
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 14c7b46183c14..5230371131ba0 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -106,7 +106,7 @@ class CalibratedClassifierCV(ClassifierMixin,
         ``-1`` means using all processors.
 
         Base estimator clones are fitted in parallel across cross-validation
-        iterations. Therefore parallelism happens only when `cv` != "prefit".
+        iterations. Therefore parallelism happens only when `cv != "prefit"`.
 
         See :term:`Glossary <n_jobs>` for more details.
 
@@ -126,8 +126,10 @@ class CalibratedClassifierCV(ClassifierMixin,
         :func:`~sklearn.model_selection.cross_val_predict`, which are then
         used for calibration. At prediction time, the classifier used is the
         `base_estimator` trained on all the data.
-        Note this method is implemented when `probabilities=True` for
-        :mod:`sklearn.svm` estimators.
+        Note that this method is also internally implemented  in
+        :mod:`sklearn.svm` estimators with the `probabilities=True` parameter.
+
+        .. versionadded:: 0.24
 
     Attributes
     ----------
@@ -146,9 +148,8 @@ class CalibratedClassifierCV(ClassifierMixin,
         - When `cv` is not "prefit" and `ensemble=False`, the `base_estimator`,
           fitted on all the data, and fitted calibrator.
 
-    n_features_in_ : int
-        The number of features in `X`. If `cv='prefit'`, number of features
-        in the data used to fit `base_estimator`.
+    .. versionchanged:: 0.24
+        Single calibrated classifier case when `ensemble=False`.
 
     Examples
     --------
@@ -280,9 +281,9 @@ def fit(self, X, y, sample_weight=None):
                 sample_weight = _check_sample_weight(sample_weight, X)
                 if not supports_sw:
                     estimator_name = type(base_estimator).__name__
-                    warnings.warn("Since %s does not support sample_weights, "
-                                  "sample weights will only be used for the "
-                                  "calibration itself." % estimator_name)
+                    warnings.warn(f"Since {estimator_name} does not support "
+                                  "sample_weights, sample weights will only be"
+                                  " used for the calibration itself.")
 
             # Check that each cross-validation fold can have at least one
             # example per class
@@ -824,7 +825,7 @@ def calibration_curve(y_true, y_prob, *, normalize=False, n_bins=5,
     array([0. , 0.5, 1. ])
     >>> prob_pred
     array([0.2  , 0.525, 0.85 ])
-     """
+    """
     y_true = column_or_1d(y_true)
     y_prob = column_or_1d(y_prob)
     check_consistent_length(y_true, y_prob)
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index bb749c3f5c88c..3001b6219c9a7 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -57,48 +57,48 @@ def test_calibration(data, method, ensemble):
     clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
     prob_pos_clf = clf.predict_proba(X_test)[:, 1]
 
-    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1, ensemble=ensemble)
-    assert_raises(ValueError, pc_clf.fit, X, y)
+    cal_clf = CalibratedClassifierCV(clf, cv=y.size + 1, ensemble=ensemble)
+    assert_raises(ValueError, cal_clf.fit, X, y)
 
     # Naive Bayes with calibration
     for this_X_train, this_X_test in [(X_train, X_test),
                                       (sparse.csr_matrix(X_train),
                                        sparse.csr_matrix(X_test))]:
-        pc_clf = CalibratedClassifierCV(
+        cal_clf = CalibratedClassifierCV(
             clf, method=method, cv=2, ensemble=ensemble
         )
         # Note that this fit overwrites the fit on the entire training
         # set
-        pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
-        prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]
+        cal_clf.fit(this_X_train, y_train, sample_weight=sw_train)
+        prob_pos_cal_clf = cal_clf.predict_proba(this_X_test)[:, 1]
 
         # Check that brier score has improved after calibration
         assert (brier_score_loss(y_test, prob_pos_clf) >
-                brier_score_loss(y_test, prob_pos_pc_clf))
+                brier_score_loss(y_test, prob_pos_cal_clf))
 
         # Check invariance against relabeling [0, 1] -> [1, 2]
-        pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
-        prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
-        assert_array_almost_equal(prob_pos_pc_clf,
-                                  prob_pos_pc_clf_relabeled)
+        cal_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
+        prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1]
+        assert_array_almost_equal(prob_pos_cal_clf,
+                                  prob_pos_cal_clf_relabeled)
 
         # Check invariance against relabeling [0, 1] -> [-1, 1]
-        pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
-        prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
-        assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled)
+        cal_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
+        prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1]
+        assert_array_almost_equal(prob_pos_cal_clf, prob_pos_cal_clf_relabeled)
 
         # Check invariance against relabeling [0, 1] -> [1, 0]
-        pc_clf.fit(this_X_train, (y_train + 1) % 2, sample_weight=sw_train)
-        prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
+        cal_clf.fit(this_X_train, (y_train + 1) % 2, sample_weight=sw_train)
+        prob_pos_cal_clf_relabeled = cal_clf.predict_proba(this_X_test)[:, 1]
         if method == "sigmoid":
-            assert_array_almost_equal(prob_pos_pc_clf,
-                                      1 - prob_pos_pc_clf_relabeled)
+            assert_array_almost_equal(prob_pos_cal_clf,
+                                      1 - prob_pos_cal_clf_relabeled)
         else:
             # Isotonic calibration is not invariant against relabeling
             # but should improve in both cases
             assert (brier_score_loss(y_test, prob_pos_clf) >
                     brier_score_loss((y_test + 1) % 2,
-                                     prob_pos_pc_clf_relabeled))
+                                     prob_pos_cal_clf_relabeled))
 
 
 @pytest.mark.parametrize('ensemble', [True, False])
@@ -205,7 +205,7 @@ def test_parallel_execution(data, method, ensemble):
 @pytest.mark.parametrize('ensemble', [True, False])
 def test_calibration_multiclass(method, ensemble):
     # Test calibration for multiclass with classifier that implements
-    # only decision function."""
+    # only decision function.
     clf = LinearSVC(random_state=7)
     X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                           centers=3, cluster_std=3.0)
@@ -293,18 +293,18 @@ def test_calibration_prefit():
                                       (sparse.csr_matrix(X_calib),
                                        sparse.csr_matrix(X_test))]:
         for method in ['isotonic', 'sigmoid']:
-            pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")
+            cal_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")
 
             for sw in [sw_calib, None]:
-                pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
-                y_prob = pc_clf.predict_proba(this_X_test)
-                y_pred = pc_clf.predict(this_X_test)
-                prob_pos_pc_clf = y_prob[:, 1]
+                cal_clf.fit(this_X_calib, y_calib, sample_weight=sw)
+                y_prob = cal_clf.predict_proba(this_X_test)
+                y_pred = cal_clf.predict(this_X_test)
+                prob_pos_cal_clf = y_prob[:, 1]
                 assert_array_equal(y_pred,
                                    np.array([0, 1])[np.argmax(y_prob, axis=1)])
 
                 assert (brier_score_loss(y_test, prob_pos_clf) >
-                        brier_score_loss(y_test, prob_pos_pc_clf))
+                        brier_score_loss(y_test, prob_pos_cal_clf))
 
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])

From d396c6e1ef12b3a03ff4c6a755565156d86c34e3 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Sat, 17 Oct 2020 20:09:54 +1300
Subject: [PATCH 38/44] expand benefit

---
 doc/modules/calibration.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/modules/calibration.rst b/doc/modules/calibration.rst
index b145e2da18c26..1a3a54ecf3a4d 100644
--- a/doc/modules/calibration.rst
+++ b/doc/modules/calibration.rst
@@ -134,6 +134,13 @@ In this case the output of :term:`predict_proba` for
 :class:`CalibratedClassifierCV` is the predicted probabilities obtained
 from the single `(classifier, calibrator)` couple.
 
+The main advantage of `ensemble=True` is to benefit from the traditional
+ensembling effect (similar to :ref:`bagging`). The resulting ensemble should
+both be well calibrated and slightly more accurate than with `ensemble=False`.
+The main advantage of using `ensemble=False` is computational: it reduces the
+overall fit time by training only a single base classifier and calibrator
+pair, decreases the final model size and increases prediction speed.
+
 Alternatively an already fitted classifier can be calibrated by setting
 `cv="prefit"`. In this case, the data is not split and all of it is used to
 fit the regressor. It is up to the user

From 8c07fd41548a7cc934eb759a6e2a9f3809b94fff Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Sun, 18 Oct 2020 18:24:01 +1300
Subject: [PATCH 39/44] formatting

---
 sklearn/calibration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 5230371131ba0..3a6289402d929 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -148,8 +148,8 @@ class CalibratedClassifierCV(ClassifierMixin,
         - When `cv` is not "prefit" and `ensemble=False`, the `base_estimator`,
           fitted on all the data, and fitted calibrator.
 
-    .. versionchanged:: 0.24
-        Single calibrated classifier case when `ensemble=False`.
+        .. versionchanged:: 0.24
+            Single calibrated classifier case when `ensemble=False`.
 
     Examples
     --------

From 577c7b3a51a9688680715c4b94f9c6034bf9e2b7 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 21 Oct 2020 17:20:12 +1300
Subject: [PATCH 40/44] suggestion

---
 doc/whats_new/v0.24.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 4cff6dfac594b..34e98d4c977e1 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -58,11 +58,10 @@ Changelog
   sparse matrix or dataframe at the start. :pr:`17546` by
   :user:`Lucy Liu <lucyleeow>`.
 
-- |Enhancement| Refactor :class:`calibration.CalibratedClassifierCV` so the
-  chaining of the classifier to its corresponding calibrator and fitting of
-  the calibrator is now separate. Also add `ensemble` parameter, which
-  implements calibration via an ensemble of calibrators (current method) or
-  just one calibrator using all the data (method implemented in
+- |Enhancement| Add `ensemble` parameter to
+  :class:`calibration.CalibratedClassifierCV`, which enables implementation
+  of calibration via an ensemble of calibrators (current method) or
+  just one calibrator using all the data (similar to the built-in feature of
   :mod:`sklearn.svm` estimators with the `probabilities=True` parameter).
   :pr:`17856` by :user:`Lucy Liu <lucyleeow>`.
 

From bfaf3e1264206453ec41b31339169b4e3293cfb5 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 22 Oct 2020 13:15:13 +1300
Subject: [PATCH 41/44] add user

---
 doc/whats_new/v0.24.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 34e98d4c977e1..8aaa27282a5e3 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -63,7 +63,8 @@ Changelog
   of calibration via an ensemble of calibrators (current method) or
   just one calibrator using all the data (similar to the built-in feature of
   :mod:`sklearn.svm` estimators with the `probabilities=True` parameter).
-  :pr:`17856` by :user:`Lucy Liu <lucyleeow>`.
+  :pr:`17856` by :user:`Lucy Liu <lucyleeow>` and
+  :user:`Andrea Esuli <aesuli>`.
 
 :mod:`sklearn.cluster`
 ......................

From 52c821c753d148f60a32c642c8916e8226bac10c Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Thu, 29 Oct 2020 20:57:39 +1300
Subject: [PATCH 42/44] suggestions

---
 sklearn/tests/test_calibration.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 3001b6219c9a7..92903995f69cf 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -13,6 +13,7 @@
                                     assert_almost_equal,
                                     assert_array_equal,
                                     assert_raises, ignore_warnings)
+from sklearn.utils.extmath import softmax
 from sklearn.exceptions import NotFittedError
 from sklearn.datasets import make_classification, make_blobs
 from sklearn.preprocessing import LabelEncoder
@@ -109,7 +110,8 @@ def test_calibration_bad_method(data, ensemble):
     clf_invalid_method = CalibratedClassifierCV(
         clf, method="foo", ensemble=ensemble
     )
-    assert_raises(ValueError, clf_invalid_method.fit, X, y)
+    with pytest.raises(ValueError):
+        clf_invalid_method.fit(X, y)
 
 
 @pytest.mark.parametrize('ensemble', [True, False])
@@ -119,7 +121,8 @@ def test_calibration_regressor(data, ensemble):
     X, y = data
     clf_base_regressor = \
         CalibratedClassifierCV(RandomForestRegressor(), ensemble=ensemble)
-    assert_raises(RuntimeError, clf_base_regressor.fit, X, y)
+    with pytest.raises(RuntimeError):
+        clf_base_regressor.fit(X, y)
 
 
 def test_calibration_default_estimator(data):
@@ -144,10 +147,8 @@ def test_calibration_cv_splitter(data, ensemble):
     assert calib_clf.cv.n_splits == splits
 
     calib_clf.fit(X, y)
-    if ensemble:
-        assert len(calib_clf.calibrated_classifiers_) == splits
-    else:
-        assert len(calib_clf.calibrated_classifiers_) == 1
+    expected_n_clf = splits if ensemble else 1
+    assert len(calib_clf.calibrated_classifiers_) == expected_n_clf
 
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
@@ -226,15 +227,11 @@ def test_calibration_multiclass(method, ensemble):
     cal_clf.fit(X_train, y_train)
     probas = cal_clf.predict_proba(X_test)
     # Check probabilities sum to 1
-    assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))
+    assert_allclose(np.sum(probas, axis=1), np.ones(len(X_test)))
 
     # Check that log-loss of calibrated classifier is smaller than
     # log-loss obtained by naively turning OvR decision function to
     # probabilities via softmax
-    def softmax(y_pred):
-        e = np.exp(-y_pred)
-        return e / e.sum(axis=1).reshape(-1, 1)
-
     uncalibrated_log_loss = \
         log_loss(y_test, softmax(clf.decision_function(X_test)))
     calibrated_log_loss = log_loss(y_test, probas)

From deb75fc8e94a0d3b7e22f07255ffd8f6c91a6f3a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 29 Oct 2020 18:20:46 +0100
Subject: [PATCH 43/44] More stable multiclass test with Brier score

---
 sklearn/tests/test_calibration.py | 73 +++++++++++++++++++------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 92903995f69cf..d59b21e84fa4e 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -66,7 +66,7 @@ def test_calibration(data, method, ensemble):
                                       (sparse.csr_matrix(X_train),
                                        sparse.csr_matrix(X_test))]:
         cal_clf = CalibratedClassifierCV(
-            clf, method=method, cv=2, ensemble=ensemble
+            clf, method=method, cv=5, ensemble=ensemble
         )
         # Note that this fit overwrites the fit on the entire training
         # set
@@ -204,58 +204,75 @@ def test_parallel_execution(data, method, ensemble):
 
 @pytest.mark.parametrize('method', ['sigmoid', 'isotonic'])
 @pytest.mark.parametrize('ensemble', [True, False])
-def test_calibration_multiclass(method, ensemble):
+# increase the number of RNG seeds to assess the statistical stability of this
+# test:
+@pytest.mark.parametrize('seed', range(2))
+def test_calibration_multiclass(method, ensemble, seed):
+
+    def multiclass_brier(y_true, proba_pred, n_classes):
+        Y_onehot = np.eye(n_classes)[y_true]
+        return np.sum((Y_onehot - proba_pred) ** 2) / Y_onehot.shape[0]
+
     # Test calibration for multiclass with classifier that implements
     # only decision function.
     clf = LinearSVC(random_state=7)
-    X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
-                          centers=3, cluster_std=3.0)
-
-    # Use categorical labels to check that CalibratedClassifierCV supports
-    # them correctly
-    target_names = np.array(['a', 'b', 'c'])
-    y = target_names[y_idx]
-
+    X, y = make_blobs(n_samples=500, n_features=100, random_state=seed,
+                      centers=10, cluster_std=15.0)
+
+    # Use an unbalanced dataset by collapsing 8 clusters into one class
+    # to make the naive calibration based on a softmax more unlikely
+    # to work.
+    y[y > 2] = 2
+    n_classes = np.unique(y).shape[0]
     X_train, y_train = X[::2], y[::2]
     X_test, y_test = X[1::2], y[1::2]
 
     clf.fit(X_train, y_train)
 
     cal_clf = CalibratedClassifierCV(
-        clf, method=method, cv=2, ensemble=ensemble
+        clf, method=method, cv=5, ensemble=ensemble
     )
     cal_clf.fit(X_train, y_train)
     probas = cal_clf.predict_proba(X_test)
     # Check probabilities sum to 1
     assert_allclose(np.sum(probas, axis=1), np.ones(len(X_test)))
 
-    # Check that log-loss of calibrated classifier is smaller than
-    # log-loss obtained by naively turning OvR decision function to
-    # probabilities via softmax
-    uncalibrated_log_loss = \
-        log_loss(y_test, softmax(clf.decision_function(X_test)))
-    calibrated_log_loss = log_loss(y_test, probas)
-    assert uncalibrated_log_loss >= calibrated_log_loss
+    # Check that the dataset is not too trivial, otherwise it's hard
+    # to get interesting calibration data during the internal
+    # cross-validation loop.
+    assert 0.65 < clf.score(X_test, y_test) < 0.95
+
+    # Check that the accuracy of the calibrated model is never degraded
+    # too much compared to the original classifier.
+    assert cal_clf.score(X_test, y_test) > 0.95 * clf.score(X_test, y_test)
+
+    # Check that Brier loss of calibrated classifier is smaller than
+    # loss obtained by naively turning OvR decision function to
+    # probabilities via a softmax
+    uncalibrated_brier = \
+        multiclass_brier(y_test, softmax(clf.decision_function(X_test)),
+                         n_classes=n_classes)
+    calibrated_brier = multiclass_brier(y_test, probas,
+                                        n_classes=n_classes)
+
+    assert calibrated_brier < 1.1 * uncalibrated_brier
 
     # Test that calibration of a multiclass classifier decreases log-loss
     # for RandomForestClassifier
-    X, y = make_blobs(n_samples=1500, n_features=2, random_state=42,
-                      cluster_std=3.0)
-    X_train, y_train = X[::2], y[::2]
-    X_test, y_test = X[1::2], y[1::2]
-
-    clf = RandomForestClassifier(n_estimators=10, random_state=42)
+    clf = RandomForestClassifier(n_estimators=30, random_state=42)
     clf.fit(X_train, y_train)
     clf_probs = clf.predict_proba(X_test)
-    loss = log_loss(y_test, clf_probs)
+    uncalibrated_brier = multiclass_brier(y_test, clf_probs,
+                                          n_classes=n_classes)
 
     cal_clf = CalibratedClassifierCV(
-        clf, method=method, cv=3, ensemble=ensemble
+        clf, method=method, cv=5, ensemble=ensemble
     )
     cal_clf.fit(X_train, y_train)
     cal_clf_probs = cal_clf.predict_proba(X_test)
-    cal_loss = log_loss(y_test, cal_clf_probs)
-    assert loss > cal_loss
+    calibrated_brier = multiclass_brier(y_test, cal_clf_probs,
+                                        n_classes=n_classes)
+    assert calibrated_brier < 1.1 * uncalibrated_brier
 
 
 def test_calibration_prefit():

From aff6bb88be120775499d3b749f3e894c065a0df3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 29 Oct 2020 18:23:30 +0100
Subject: [PATCH 44/44] Unused import

---
 sklearn/tests/test_calibration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index d59b21e84fa4e..3d2931d0c65f9 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -25,7 +25,7 @@
 from sklearn.feature_extraction import DictVectorizer
 from sklearn.pipeline import Pipeline
 from sklearn.impute import SimpleImputer
-from sklearn.metrics import brier_score_loss, log_loss
+from sklearn.metrics import brier_score_loss
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.calibration import _sigmoid_calibration, _SigmoidCalibration
 from sklearn.calibration import calibration_curve