From f128648f79dcbabb25e3709fd876918b22275b5a Mon Sep 17 00:00:00 2001 From: virchan Date: Thu, 7 Dec 2023 18:02:24 -0800 Subject: [PATCH 01/17] Added link for plot_adaboost_multiclass example --- examples/ensemble/plot_adaboost_multiclass.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index 35b0d1bb86470..5343d93778696 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -17,6 +17,9 @@ be selected. This ensures that subsequent iterations of the algorithm focus on the difficult-to-classify samples. +For more example of usage, see +:ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. + .. topic:: References: .. [1] :doi:`J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class adaboost." From 6fb31eadfba39a029d26bc042c69520b8fc58c63 Mon Sep 17 00:00:00 2001 From: virchan Date: Sun, 10 Dec 2023 10:07:14 -0800 Subject: [PATCH 02/17] Moved the example link from the example itself back to the doc string in _weight_boosting.py. --- examples/ensemble/plot_adaboost_multiclass.py | 3 --- sklearn/ensemble/_weight_boosting.py | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py index 5343d93778696..35b0d1bb86470 100644 --- a/examples/ensemble/plot_adaboost_multiclass.py +++ b/examples/ensemble/plot_adaboost_multiclass.py @@ -17,9 +17,6 @@ be selected. This ensures that subsequent iterations of the algorithm focus on the difficult-to-classify samples. -For more example of usage, see -:ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. - .. topic:: References: .. [1] :doi:`J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class adaboost." diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index af731892880ee..313524097d088 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -351,6 +351,9 @@ class AdaBoostClassifier( Read more in the :ref:`User Guide `. + For more example of usage, see + :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. + .. versionadded:: 0.14 Parameters From a2c74683323e3bf85c836f6503a4211f30b84fac Mon Sep 17 00:00:00 2001 From: virchan Date: Fri, 26 Jan 2024 11:54:03 -0800 Subject: [PATCH 03/17] Reworded the example reference of AdaBoost in the `ensemble/_weight_boosting.py` file, moving it below the `Examples` section for improved organization. Included an AdaBoost example reference within the DecisionTree class in the `tree/-class.py` file. --- sklearn/ensemble/_weight_boosting.py | 6 +++--- sklearn/tree/_classes.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 313524097d088..406acfff4fa6f 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -351,9 +351,6 @@ class AdaBoostClassifier( Read more in the :ref:`User Guide `. - For more example of usage, see - :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. - .. versionadded:: 0.14 Parameters @@ -481,6 +478,9 @@ class AdaBoostClassifier( array([1]) >>> clf.score(X, y) 0.96... + + For an example of using AdaBoost to fit a sequence of DecisionTrees as weak learners, + please refer to :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. """ # TODO(1.6): Modify _parameter_constraints for "algorithm" to only check diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index 00d61f184731d..3972dbe909906 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -934,6 +934,9 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree): ... array([ 1. , 0.93..., 0.86..., 0.93..., 0.93..., 0.93..., 0.93..., 1. , 0.93..., 1. ]) + + For an example of using AdaBoost to fit a sequence of DecisionTrees as weak learners, + please refer to :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. """ _parameter_constraints: dict = { From eb50b06819ba762d0b48135ecf3c0a31f8aea0d9 Mon Sep 17 00:00:00 2001 From: virchan Date: Tue, 27 Feb 2024 16:18:32 -0800 Subject: [PATCH 04/17] - Added the Multi-class AdaBoosted Decision Trees example to the Decision Trees user guide. - Modified the doc-string wording in the `AdaBoostClassifier` class referencing to the aforementioned example. --- doc/modules/tree.rst | 1 + sklearn/ensemble/_weight_boosting.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index b54b913573a34..e838216ac76c5 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -222,6 +222,7 @@ of external libraries and is more compact: * :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py` * :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` + * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` .. _tree_regression: diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 406acfff4fa6f..7e1e9ee8fb906 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -479,7 +479,7 @@ class AdaBoostClassifier( >>> clf.score(X, y) 0.96... - For an example of using AdaBoost to fit a sequence of DecisionTrees as weak learners, + For a detailed example of using AdaBoost to fit a sequence of DecisionTrees as weak learners, please refer to :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. """ From 46318a7e443b5ba9810f51cfd4cfacb2b148707e Mon Sep 17 00:00:00 2001 From: virchan Date: Fri, 1 Mar 2024 13:39:28 -0800 Subject: [PATCH 05/17] Reformatted doc-strings to meet the ruff requirement --- sklearn/ensemble/_weight_boosting.py | 5 +++-- sklearn/tree/_classes.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index 7e1e9ee8fb906..0461a397983be 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -479,8 +479,9 @@ class AdaBoostClassifier( >>> clf.score(X, y) 0.96... - For a detailed example of using AdaBoost to fit a sequence of DecisionTrees as weak learners, - please refer to :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. + For a detailed example of using AdaBoost to fit a sequence of DecisionTrees + as weaklearners, please refer to + :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. """ # TODO(1.6): Modify _parameter_constraints for "algorithm" to only check diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index 0205fdb117c03..020779404ca2d 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -935,8 +935,9 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree): array([ 1. , 0.93..., 0.86..., 0.93..., 0.93..., 0.93..., 0.93..., 1. , 0.93..., 1. ]) - For an example of using AdaBoost to fit a sequence of DecisionTrees as weak learners, - please refer to :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. + For an example of using AdaBoost to fit a sequence of + DecisionTrees as weak learners, please refer to + :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. """ _parameter_constraints: dict = { From 752a8e83bc25c25c48a8a57cd112b925e255ca1e Mon Sep 17 00:00:00 2001 From: virchan Date: Fri, 1 Mar 2024 15:36:34 -0800 Subject: [PATCH 06/17] Empty commit for test re-run. From 6d866d4808d3e152f1d5b009e6f12a571c264bc7 Mon Sep 17 00:00:00 2001 From: virchan Date: Wed, 6 Mar 2024 11:11:08 -0800 Subject: [PATCH 07/17] Removed example links from two files --- doc/modules/tree.rst | 1 - sklearn/tree/_classes.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst index e838216ac76c5..b54b913573a34 100644 --- a/doc/modules/tree.rst +++ b/doc/modules/tree.rst @@ -222,7 +222,6 @@ of external libraries and is more compact: * :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py` * :ref:`sphx_glr_auto_examples_tree_plot_unveil_tree_structure.py` - * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` .. _tree_regression: diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index 020779404ca2d..7e2419a77dcee 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -935,9 +935,6 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree): array([ 1. , 0.93..., 0.86..., 0.93..., 0.93..., 0.93..., 0.93..., 1. , 0.93..., 1. ]) - For an example of using AdaBoost to fit a sequence of - DecisionTrees as weak learners, please refer to - :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. """ _parameter_constraints: dict = { From 23e6ef1a7ad8ef92376b4857bc59831d3a7bd205 Mon Sep 17 00:00:00 2001 From: virchan Date: Wed, 6 Mar 2024 11:37:36 -0800 Subject: [PATCH 08/17] Empty commit for checks re-run From 03aa44ab56d11e7a7b84dcc2cadd677d1366f1aa Mon Sep 17 00:00:00 2001 From: virchan Date: Wed, 6 Mar 2024 12:05:47 -0800 Subject: [PATCH 09/17] Removed an empty line for checks re-run. --- sklearn/tree/_classes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py index 7e2419a77dcee..9f99d831a0990 100644 --- a/sklearn/tree/_classes.py +++ b/sklearn/tree/_classes.py @@ -934,7 +934,6 @@ class DecisionTreeClassifier(ClassifierMixin, BaseDecisionTree): ... array([ 1. , 0.93..., 0.86..., 0.93..., 0.93..., 0.93..., 0.93..., 1. , 0.93..., 1. ]) - """ _parameter_constraints: dict = { From 88d45008d8c3daeee5fbdd159082fddee661e4d7 Mon Sep 17 00:00:00 2001 From: virchan Date: Mon, 6 May 2024 10:57:29 -0700 Subject: [PATCH 10/17] Created the `sklearn/calibration_temperature.py` to contain all work related to temperature scaling. --- sklearn/calibration_temperature.py | 1558 ++++++++++++++++++++++++++++ 1 file changed, 1558 insertions(+) create mode 100644 sklearn/calibration_temperature.py diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py new file mode 100644 index 0000000000000..12f287482488d --- /dev/null +++ b/sklearn/calibration_temperature.py @@ -0,0 +1,1558 @@ +"""Calibration of predicted probabilities.""" + +# Author: Alexandre Gramfort +# Balazs Kegl +# Jan Hendrik Metzen +# Mathieu Blondel +# +# License: BSD 3 clause + +import warnings +from inspect import signature +from math import log +from numbers import Integral, Real + +import numpy as np +from scipy.optimize import minimize +from scipy.special import expit, softmax + +from sklearn.utils import Bunch + +from ._loss import HalfBinomialLoss +from .base import ( + BaseEstimator, + ClassifierMixin, + MetaEstimatorMixin, + RegressorMixin, + _fit_context, + clone, +) +from .isotonic import IsotonicRegression +from .model_selection import check_cv, cross_val_predict +from .preprocessing import LabelEncoder, label_binarize +from .svm import LinearSVC +from .utils import ( + _safe_indexing, + column_or_1d, + indexable, +) +from .utils._param_validation import ( + HasMethods, + Interval, + StrOptions, + validate_params, +) +from .utils._plotting import _BinaryClassifierCurveDisplayMixin +from .utils._response import _get_response_values, _process_predict_proba +from .utils.metadata_routing import ( + MetadataRouter, + MethodMapping, + _routing_enabled, + process_routing, +) +from .utils.multiclass import check_classification_targets +from .utils.parallel import Parallel, delayed +from .utils.validation import ( + _check_method_params, + _check_pos_label_consistency, + _check_response_method, + _check_sample_weight, + _num_samples, + check_consistent_length, + check_is_fitted, +) + + +class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): + """Probability calibration with isotonic regression or logistic regression. + + This class uses cross-validation to both estimate the parameters of a + classifier and subsequently calibrate a classifier. With default + `ensemble=True`, for each cv split it + fits a copy of the base estimator to the training subset, and calibrates it + using the testing subset. For prediction, predicted probabilities are + averaged across these individual calibrated classifiers. When + `ensemble=False`, cross-validation is used to obtain unbiased predictions, + via :func:`~sklearn.model_selection.cross_val_predict`, which are then + used for calibration. For prediction, the base estimator, trained using all + the data, is used. This is the prediction method implemented when + `probabilities=True` for :class:`~sklearn.svm.SVC` and :class:`~sklearn.svm.NuSVC` + estimators (see :ref:`User Guide ` for details). + + Already fitted classifiers can be calibrated via the parameter + `cv="prefit"`. In this case, no cross-validation is used and all provided + data is used for calibration. The user has to take care manually that data + for model fitting and calibration are disjoint. + + The calibration is based on the :term:`decision_function` method of the + `estimator` if it exists, else on :term:`predict_proba`. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + estimator : estimator instance, default=None + The classifier whose output need to be calibrated to provide more + accurate `predict_proba` outputs. The default classifier is + a :class:`~sklearn.svm.LinearSVC`. + + .. versionadded:: 1.2 + + method : {'sigmoid', 'isotonic'}, default='sigmoid' + The method to use for calibration. Can be 'sigmoid' which + corresponds to Platt's method (i.e. a logistic regression model) or + 'isotonic' which is a non-parametric approach. It is not advised to + use isotonic calibration with too few calibration samples + ``(<<1000)`` since it tends to overfit. + + cv : int, cross-validation generator, iterable or "prefit", \ + default=None + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + + - None, to use the default 5-fold cross-validation, + - integer, to specify the number of folds. + - :term:`CV splitter`, + - An iterable yielding (train, test) splits as arrays of indices. + + For integer/None inputs, if ``y`` is binary or multiclass, + :class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is + neither binary nor multiclass, :class:`~sklearn.model_selection.KFold` + is used. + + Refer to the :ref:`User Guide ` for the various + cross-validation strategies that can be used here. + + If "prefit" is passed, it is assumed that `estimator` has been + fitted already and all data is used for calibration. + + .. versionchanged:: 0.22 + ``cv`` default value if None changed from 3-fold to 5-fold. + + n_jobs : int, default=None + Number of jobs to run in parallel. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. + + Base estimator clones are fitted in parallel across cross-validation + iterations. Therefore parallelism happens only when `cv != "prefit"`. + + See :term:`Glossary ` for more details. + + .. versionadded:: 0.24 + + ensemble : bool, default=True + Determines how the calibrator is fitted when `cv` is not `'prefit'`. + Ignored if `cv='prefit'`. + + If `True`, the `estimator` is fitted using training data, and + calibrated using testing data, for each `cv` fold. The final estimator + is an ensemble of `n_cv` fitted classifier and calibrator pairs, where + `n_cv` is the number of cross-validation folds. The output is the + average predicted probabilities of all pairs. + + If `False`, `cv` is used to compute unbiased predictions, via + :func:`~sklearn.model_selection.cross_val_predict`, which are then + used for calibration. At prediction time, the classifier used is the + `estimator` trained on all the data. + Note that this method is also internally implemented in + :mod:`sklearn.svm` estimators with the `probabilities=True` parameter. + + .. versionadded:: 0.24 + + Attributes + ---------- + classes_ : ndarray of shape (n_classes,) + The class labels. + + n_features_in_ : int + Number of features seen during :term:`fit`. Only defined if the + underlying estimator exposes such an attribute when fit. + + .. versionadded:: 0.24 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Only defined if the + underlying estimator exposes such an attribute when fit. + + .. versionadded:: 1.0 + + calibrated_classifiers_ : list (len() equal to cv or 1 if `cv="prefit"` \ + or `ensemble=False`) + The list of classifier and calibrator pairs. + + - When `cv="prefit"`, the fitted `estimator` and fitted + calibrator. + - When `cv` is not "prefit" and `ensemble=True`, `n_cv` fitted + `estimator` and calibrator pairs. `n_cv` is the number of + cross-validation folds. + - When `cv` is not "prefit" and `ensemble=False`, the `estimator`, + fitted on all the data, and fitted calibrator. + + .. versionchanged:: 0.24 + Single calibrated classifier case when `ensemble=False`. + + See Also + -------- + calibration_curve : Compute true and predicted probabilities + for a calibration curve. + + References + ---------- + .. [1] Obtaining calibrated probability estimates from decision trees + and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 + + .. [2] Transforming Classifier Scores into Accurate Multiclass + Probability Estimates, B. Zadrozny & C. Elkan, (KDD 2002) + + .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to + Regularized Likelihood Methods, J. Platt, (1999) + + .. [4] Predicting Good Probabilities with Supervised Learning, + A. Niculescu-Mizil & R. Caruana, ICML 2005 + + Examples + -------- + >>> from sklearn.datasets import make_classification + >>> from sklearn.naive_bayes import GaussianNB + >>> from sklearn.calibration import CalibratedClassifierCV + >>> X, y = make_classification(n_samples=100, n_features=2, + ... n_redundant=0, random_state=42) + >>> base_clf = GaussianNB() + >>> calibrated_clf = CalibratedClassifierCV(base_clf, cv=3) + >>> calibrated_clf.fit(X, y) + CalibratedClassifierCV(...) + >>> len(calibrated_clf.calibrated_classifiers_) + 3 + >>> calibrated_clf.predict_proba(X)[:5, :] + array([[0.110..., 0.889...], + [0.072..., 0.927...], + [0.928..., 0.071...], + [0.928..., 0.071...], + [0.071..., 0.928...]]) + >>> from sklearn.model_selection import train_test_split + >>> X, y = make_classification(n_samples=100, n_features=2, + ... n_redundant=0, random_state=42) + >>> X_train, X_calib, y_train, y_calib = train_test_split( + ... X, y, random_state=42 + ... ) + >>> base_clf = GaussianNB() + >>> base_clf.fit(X_train, y_train) + GaussianNB() + >>> calibrated_clf = CalibratedClassifierCV(base_clf, cv="prefit") + >>> calibrated_clf.fit(X_calib, y_calib) + CalibratedClassifierCV(...) + >>> len(calibrated_clf.calibrated_classifiers_) + 1 + >>> calibrated_clf.predict_proba([[-0.5, 0.5]]) + array([[0.936..., 0.063...]]) + """ + + _parameter_constraints: dict = { + "estimator": [ + HasMethods(["fit", "predict_proba"]), + HasMethods(["fit", "decision_function"]), + None, + ], + "method": [StrOptions({"isotonic", "sigmoid"})], + "cv": ["cv_object", StrOptions({"prefit"})], + "n_jobs": [Integral, None], + "ensemble": ["boolean"], + } + + def __init__( + self, + estimator=None, + *, + method="sigmoid", + cv=None, + n_jobs=None, + ensemble=True, + ): + self.estimator = estimator + self.method = method + self.cv = cv + self.n_jobs = n_jobs + self.ensemble = ensemble + + def _get_estimator(self): + """Resolve which estimator to return (default is LinearSVC)""" + if self.estimator is None: + # we want all classifiers that don't expose a random_state + # to be deterministic (and we don't want to expose this one). + estimator = LinearSVC(random_state=0, dual="auto") + if _routing_enabled(): + estimator.set_fit_request(sample_weight=True) + else: + estimator = self.estimator + + return estimator + + @_fit_context( + # CalibratedClassifierCV.estimator is not validated yet + prefer_skip_nested_validation=False + ) + def fit(self, X, y, sample_weight=None, **fit_params): + """Fit the calibrated model. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + + y : array-like of shape (n_samples,) + Target values. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If None, then samples are equally weighted. + + **fit_params : dict + Parameters to pass to the `fit` method of the underlying + classifier. + + Returns + ------- + self : object + Returns an instance of self. + """ + check_classification_targets(y) + X, y = indexable(X, y) + if sample_weight is not None: + sample_weight = _check_sample_weight(sample_weight, X) + + estimator = self._get_estimator() + + self.calibrated_classifiers_ = [] + if self.cv == "prefit": + # `classes_` should be consistent with that of estimator + check_is_fitted(self.estimator, attributes=["classes_"]) + self.classes_ = self.estimator.classes_ + + predictions, _ = _get_response_values( + estimator, + X, + response_method=["decision_function", "predict_proba"], + ) + if predictions.ndim == 1: + # Reshape binary output from `(n_samples,)` to `(n_samples, 1)` + predictions = predictions.reshape(-1, 1) + + calibrated_classifier = _fit_calibrator( + estimator, + predictions, + y, + self.classes_, + self.method, + sample_weight, + ) + self.calibrated_classifiers_.append(calibrated_classifier) + else: + # Set `classes_` using all `y` + label_encoder_ = LabelEncoder().fit(y) + self.classes_ = label_encoder_.classes_ + + if _routing_enabled(): + routed_params = process_routing( + self, + "fit", + sample_weight=sample_weight, + **fit_params, + ) + else: + # sample_weight checks + fit_parameters = signature(estimator.fit).parameters + supports_sw = "sample_weight" in fit_parameters + if sample_weight is not None and not supports_sw: + estimator_name = type(estimator).__name__ + warnings.warn( + f"Since {estimator_name} does not appear to accept" + " sample_weight, sample weights will only be used for the" + " calibration itself. This can be caused by a limitation of" + " the current scikit-learn API. See the following issue for" + " more details:" + " https://github.com/scikit-learn/scikit-learn/issues/21134." + " Be warned that the result of the calibration is likely to be" + " incorrect." + ) + routed_params = Bunch() + routed_params.splitter = Bunch(split={}) # no routing for splitter + routed_params.estimator = Bunch(fit=fit_params) + if sample_weight is not None and supports_sw: + routed_params.estimator.fit["sample_weight"] = sample_weight + + # Check that each cross-validation fold can have at least one + # example per class + if isinstance(self.cv, int): + n_folds = self.cv + elif hasattr(self.cv, "n_splits"): + n_folds = self.cv.n_splits + else: + n_folds = None + if n_folds and np.any( + [np.sum(y == class_) < n_folds for class_ in self.classes_] + ): + raise ValueError( + f"Requesting {n_folds}-fold " + "cross-validation but provided less than " + f"{n_folds} examples for at least one class." + ) + cv = check_cv(self.cv, y, classifier=True) + + if self.ensemble: + parallel = Parallel(n_jobs=self.n_jobs) + self.calibrated_classifiers_ = parallel( + delayed(_fit_classifier_calibrator_pair)( + clone(estimator), + X, + y, + train=train, + test=test, + method=self.method, + classes=self.classes_, + sample_weight=sample_weight, + fit_params=routed_params.estimator.fit, + ) + for train, test in cv.split(X, y, **routed_params.splitter.split) + ) + else: + this_estimator = clone(estimator) + method_name = _check_response_method( + this_estimator, + ["decision_function", "predict_proba"], + ).__name__ + predictions = cross_val_predict( + estimator=this_estimator, + X=X, + y=y, + cv=cv, + method=method_name, + n_jobs=self.n_jobs, + params=routed_params.estimator.fit, + ) + if len(self.classes_) == 2: + # Ensure shape (n_samples, 1) in the binary case + if method_name == "predict_proba": + # Select the probability column of the postive class + predictions = _process_predict_proba( + y_pred=predictions, + target_type="binary", + classes=self.classes_, + pos_label=self.classes_[1], + ) + predictions = predictions.reshape(-1, 1) + + this_estimator.fit(X, y, **routed_params.estimator.fit) + # Note: Here we don't pass on fit_params because the supported + # calibrators don't support fit_params anyway + calibrated_classifier = _fit_calibrator( + this_estimator, + predictions, + y, + self.classes_, + self.method, + sample_weight, + ) + self.calibrated_classifiers_.append(calibrated_classifier) + + first_clf = self.calibrated_classifiers_[0].estimator + if hasattr(first_clf, "n_features_in_"): + self.n_features_in_ = first_clf.n_features_in_ + if hasattr(first_clf, "feature_names_in_"): + self.feature_names_in_ = first_clf.feature_names_in_ + return self + + def predict_proba(self, X): + """Calibrated probabilities of classification. + + This function returns calibrated probabilities of classification + according to each class on an array of test vectors X. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The samples, as accepted by `estimator.predict_proba`. + + Returns + ------- + C : ndarray of shape (n_samples, n_classes) + The predicted probas. + """ + check_is_fitted(self) + # Compute the arithmetic mean of the predictions of the calibrated + # classifiers + mean_proba = np.zeros((_num_samples(X), len(self.classes_))) + for calibrated_classifier in self.calibrated_classifiers_: + proba = calibrated_classifier.predict_proba(X) + mean_proba += proba + + mean_proba /= len(self.calibrated_classifiers_) + + return mean_proba + + def predict(self, X): + """Predict the target of new samples. + + The predicted class is the class that has the highest probability, + and can thus be different from the prediction of the uncalibrated classifier. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The samples, as accepted by `estimator.predict`. + + Returns + ------- + C : ndarray of shape (n_samples,) + The predicted class. + """ + check_is_fitted(self) + return self.classes_[np.argmax(self.predict_proba(X), axis=1)] + + def get_metadata_routing(self): + """Get metadata routing of this object. + + Please check :ref:`User Guide ` on how the routing + mechanism works. + + Returns + ------- + routing : MetadataRouter + A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating + routing information. + """ + router = ( + MetadataRouter(owner=self.__class__.__name__) + .add_self_request(self) + .add( + estimator=self._get_estimator(), + method_mapping=MethodMapping().add(callee="fit", caller="fit"), + ) + .add( + splitter=self.cv, + method_mapping=MethodMapping().add(callee="split", caller="fit"), + ) + ) + return router + + def _more_tags(self): + return { + "_xfail_checks": { + "check_sample_weights_invariance": ( + "Due to the cross-validation and sample ordering, removing a sample" + " is not strictly equal to putting is weight to zero. Specific unit" + " tests are added for CalibratedClassifierCV specifically." + ), + } + } + + +def _fit_classifier_calibrator_pair( + estimator, + X, + y, + train, + test, + method, + classes, + sample_weight=None, + fit_params=None, +): + """Fit a classifier/calibration pair on a given train/test split. + + Fit the classifier on the train set, compute its predictions on the test + set and use the predictions as input to fit the calibrator along with the + test labels. + + Parameters + ---------- + estimator : estimator instance + Cloned base estimator. + + X : array-like, shape (n_samples, n_features) + Sample data. + + y : array-like, shape (n_samples,) + Targets. + + train : ndarray, shape (n_train_indices,) + Indices of the training subset. + + test : ndarray, shape (n_test_indices,) + Indices of the testing subset. + + method : {'sigmoid', 'isotonic'} + Method to use for calibration. + + classes : ndarray, shape (n_classes,) + The target classes. + + sample_weight : array-like, default=None + Sample weights for `X`. + + fit_params : dict, default=None + Parameters to pass to the `fit` method of the underlying + classifier. + + Returns + ------- + calibrated_classifier : _CalibratedClassifier instance + """ + fit_params_train = _check_method_params(X, params=fit_params, indices=train) + X_train, y_train = _safe_indexing(X, train), _safe_indexing(y, train) + X_test, y_test = _safe_indexing(X, test), _safe_indexing(y, test) + + estimator.fit(X_train, y_train, **fit_params_train) + + predictions, _ = _get_response_values( + estimator, + X_test, + response_method=["decision_function", "predict_proba"], + ) + if predictions.ndim == 1: + # Reshape binary output from `(n_samples,)` to `(n_samples, 1)` + predictions = predictions.reshape(-1, 1) + + sw_test = None if sample_weight is None else _safe_indexing(sample_weight, test) + calibrated_classifier = _fit_calibrator( + estimator, predictions, y_test, classes, method, sample_weight=sw_test + ) + return calibrated_classifier + + +def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None): + """Fit calibrator(s) and return a `_CalibratedClassifier` + instance. + + `n_classes` (i.e. `len(clf.classes_)`) calibrators are fitted. + However, if `n_classes` equals 2, one calibrator is fitted. + + Parameters + ---------- + clf : estimator instance + Fitted classifier. + + predictions : array-like, shape (n_samples, n_classes) or (n_samples, 1) \ + when binary. + Raw predictions returned by the un-calibrated base classifier. + + y : array-like, shape (n_samples,) + The targets. + + classes : ndarray, shape (n_classes,) + All the prediction classes. + + method : {'sigmoid', 'isotonic'} + The method to use for calibration. + + sample_weight : ndarray, shape (n_samples,), default=None + Sample weights. If None, then samples are equally weighted. + + Returns + ------- + pipeline : _CalibratedClassifier instance + """ + Y = label_binarize(y, classes=classes) + label_encoder = LabelEncoder().fit(classes) + pos_class_indices = label_encoder.transform(clf.classes_) + calibrators = [] + + if (method == 'isotonic') or (method == 'sigmoid'): + for class_idx, this_pred in zip(pos_class_indices, predictions.T): + if method == "isotonic": + calibrator = IsotonicRegression(out_of_bounds="clip") + else: # "sigmoid" + calibrator = _SigmoidCalibration() + calibrator.fit(this_pred, Y[:, class_idx], sample_weight) + calibrators.append(calibrator) + + elif method == 'Temperature_scaling': + calibrator = _TemperatureScaling() + calibrator.fit(predictions, Y, sample_weight) + + pipeline = _CalibratedClassifier(clf, calibrators, method=method, classes=classes) + return pipeline + + +class _CalibratedClassifier: + """Pipeline-like chaining a fitted classifier and its fitted calibrators. + + Parameters + ---------- + estimator : estimator instance + Fitted classifier. + + calibrators : list of fitted estimator instances + List of fitted calibrators (either 'IsotonicRegression' or + '_SigmoidCalibration'). The number of calibrators equals the number of + classes. However, if there are 2 classes, the list contains only one + fitted calibrator. + + classes : array-like of shape (n_classes,) + All the prediction classes. + + method : {'sigmoid', 'isotonic'}, default='sigmoid' + The method to use for calibration. Can be 'sigmoid' which + corresponds to Platt's method or 'isotonic' which is a + non-parametric approach based on isotonic regression. + """ + + def __init__(self, estimator, calibrators, *, classes, method="sigmoid"): + self.estimator = estimator + self.calibrators = calibrators + self.classes = classes + self.method = method + + def predict_proba(self, X): + """Calculate calibrated probabilities. + + Calculates classification calibrated probabilities + for each class, in a one-vs-all manner, for `X`. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + The sample data. + + Returns + ------- + proba : array, shape (n_samples, n_classes) + The predicted probabilities. Can be exact zeros. + """ + predictions, _ = _get_response_values( + self.estimator, + X, + response_method=["decision_function", "predict_proba"], + ) + if predictions.ndim == 1: + # Reshape binary output from `(n_samples,)` to `(n_samples, 1)` + predictions = predictions.reshape(-1, 1) + + n_classes = len(self.classes) + + label_encoder = LabelEncoder().fit(self.classes) + pos_class_indices = label_encoder.transform(self.estimator.classes_) + + proba = np.zeros((_num_samples(X), n_classes)) + + # Sigmoid and Isotonic methods + if (self.method == 'sigmoid') or (self.method == 'isotonic'): + + for class_idx, this_pred, calibrator in zip( + pos_class_indices, predictions.T, self.calibrators + ): + if n_classes == 2: + # When binary, `predictions` consists only of predictions for + # clf.classes_[1] but `pos_class_indices` = 0 + class_idx += 1 + proba[:, class_idx] = calibrator.predict(this_pred) + + # Normalize the probabilities + if n_classes == 2: + proba[:, 0] = 1.0 - proba[:, 1] + else: + denominator = np.sum(proba, axis=1)[:, np.newaxis] + # In the edge case where for each class calibrator returns a null + # probability for a given sample, use the uniform distribution + # instead. + uniform_proba = np.full_like(proba, 1 / n_classes) + proba = np.divide( + proba, denominator, out=uniform_proba, where=denominator != 0 + ) + + # Temperature Scaling method + elif self.method == 'temperature_scaling': + + assert len(self.calibrators) == 1, 'Temperature scaling should consists of one calibrator.' + + proba = self.calibrators[0].predict(predictions) + + # Deal with cases where the predicted probability minimally exceeds 1.0 + proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0 + + return proba + + +# The max_abs_prediction_threshold was approximated using +# logit(np.finfo(np.float64).eps) which is about -36 +def _sigmoid_calibration( + predictions, y, sample_weight=None, max_abs_prediction_threshold=30 +): + """Probability Calibration with sigmoid method (Platt 2000) + + Parameters + ---------- + predictions : ndarray of shape (n_samples,) + The decision function or predict proba for the samples. + + y : ndarray of shape (n_samples,) + The targets. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If None, then samples are equally weighted. + + Returns + ------- + a : float + The slope. + + b : float + The intercept. + + References + ---------- + Platt, "Probabilistic Outputs for Support Vector Machines" + """ + predictions = column_or_1d(predictions) + y = column_or_1d(y) + + F = predictions # F follows Platt's notations + + scale_constant = 1.0 + max_prediction = np.max(np.abs(F)) + + # If the predictions have large values we scale them in order to bring + # them within a suitable range. This has no effect on the final + # (prediction) result because linear models like Logisitic Regression + # without a penalty are invariant to multiplying the features by a + # constant. + if max_prediction >= max_abs_prediction_threshold: + scale_constant = max_prediction + # We rescale the features in a copy: inplace rescaling could confuse + # the caller and make the code harder to reason about. + F = F / scale_constant + + # Bayesian priors (see Platt end of section 2.2): + # It corresponds to the number of samples, taking into account the + # `sample_weight`. + mask_negative_samples = y <= 0 + if sample_weight is not None: + prior0 = (sample_weight[mask_negative_samples]).sum() + prior1 = (sample_weight[~mask_negative_samples]).sum() + else: + prior0 = float(np.sum(mask_negative_samples)) + prior1 = y.shape[0] - prior0 + T = np.zeros_like(y, dtype=predictions.dtype) + T[y > 0] = (prior1 + 1.0) / (prior1 + 2.0) + T[y <= 0] = 1.0 / (prior0 + 2.0) + + bin_loss = HalfBinomialLoss() + + def loss_grad(AB): + # .astype below is needed to ensure y_true and raw_prediction have the + # same dtype. With result = np.float64(0) * np.array([1, 2], dtype=np.float32) + # - in Numpy 2, result.dtype is float64 + # - in Numpy<2, result.dtype is float32 + raw_prediction = -(AB[0] * F + AB[1]).astype(dtype=predictions.dtype) + l, g = bin_loss.loss_gradient( + y_true=T, + raw_prediction=raw_prediction, + sample_weight=sample_weight, + ) + loss = l.sum() + # TODO: Remove casting to np.float64 when minimum supported SciPy is 1.11.2 + # With SciPy >= 1.11.2, the LBFGS implementation will cast to float64 + # https://github.com/scipy/scipy/pull/18825. + # Here we cast to float64 to support SciPy < 1.11.2 + grad = np.asarray([-g @ F, -g.sum()], dtype=np.float64) + return loss, grad + + AB0 = np.array([0.0, log((prior0 + 1.0) / (prior1 + 1.0))]) + + opt_result = minimize( + loss_grad, + AB0, + method="L-BFGS-B", + jac=True, + options={ + "gtol": 1e-6, + "ftol": 64 * np.finfo(float).eps, + }, + ) + AB_ = opt_result.x + + # The tuned multiplicative parameter is converted back to the original + # input feature scale. The offset parameter does not need rescaling since + # we did not rescale the outcome variable. + return AB_[0] / scale_constant, AB_[1] + + +class _SigmoidCalibration(RegressorMixin, BaseEstimator): + """Sigmoid regression model. + + Attributes + ---------- + a_ : float + The slope. + + b_ : float + The intercept. + """ + + def fit(self, X, y, sample_weight=None): + """Fit the model using X, y as training data. + + Parameters + ---------- + X : array-like of shape (n_samples,) + Training data. + + y : array-like of shape (n_samples,) + Training target. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If None, then samples are equally weighted. + + Returns + ------- + self : object + Returns an instance of self. + """ + X = column_or_1d(X) + y = column_or_1d(y) + X, y = indexable(X, y) + + self.a_, self.b_ = _sigmoid_calibration(X, y, sample_weight) + return self + + def predict(self, T): + """Predict new data by linear interpolation. + + Parameters + ---------- + T : array-like of shape (n_samples,) + Data to predict from. + + Returns + ------- + T_ : ndarray of shape (n_samples,) + The predicted data. + """ + T = column_or_1d(T) + return expit(-(self.a_ * T + self.b_)) + + +def _row_max_normalization(data: np.ndarray) -> np.ndarray: + """Normalise the output by subtracting + the per-row maximum element. + """ + row_max: np.ndarray = np.max(data, + axis=1, + keepdims=True + ) + + return data - row_max + + +def _softmax_T(predictions: np.ndarray, + temperature: float, + ) -> np.ndarray: + """Softmax function scaled by the inverse temperature + """ + + softmax_T_output: np.ndarray = predictions + softmax_T_output = _row_max_normalization(softmax_T_output) + softmax_T_output /= temperature + softmax_T_output = softmax(softmax_T_output, + axis=1 + ) + softmax_T_output = softmax_T_output.astype(dtype=predictions.dtype) + + return softmax_T_output + + +def _exp_T(predictions: np.ndarray, + temperature: float + ) -> np.ndarray: + """Scale by inverse temperature, and then apply the nature exponential function + """ + + exp_T_output: np.ndarray = predictions + exp_T_output = _row_max_normalization(exp_T_output) + exp_T_output /= temperature + exp_T_output = np.exp(exp_T_output) + + return exp_T_output + + +def _temperature_scaling(predictions: np.ndarray, + labels: np.ndarray, + initial_temperature: float + ) -> float: + """ Minimize the Negative Log Likelihood Loss with respect to Temperature + """ + + def negative_log_likelihood(temperature: float): + """Negative Log Likelihood Loss and its Derivative + with respect to Temperature + """ + + # Losses + losses: np.ndarray = _softmax_T(predictions, + temperature + ) + + # Select the probability of the correct class + losses = losses[np.arange(losses.shape[0]), + labels + ] + + losses = np.log(losses) + + # Derivates with respect to Temperature + exp_T: np.ndarray = _exp_T(predictions, temperature) + exp_T_sum = exp_T.sum(axis=1) + + term_1: np.ndarray = _row_max_normalization(predictions) + term_1 /= temperature ** 2 + term_1 = - term_1[np.arange(term_1.shape[0]), + labels + ] + term_1 *= exp_T_sum + + term_2: np.ndarray = _row_max_normalization(predictions) + term_2 /= temperature ** 2 + term_2 = _row_max_normalization(term_2) + term_2 *= exp_T + term_2 = term_2.sum(axis=1) + + dL_dts: np.ndarray = (term_1 + term_2) / exp_T_sum + + # print(f"{-losses.sum() = }, {-dL_dts.sum() = }") + + return -losses.sum(), -dL_dts.sum() + + temperature_minimizer: minimize = minimize(negative_log_likelihood, + initial_temperature, + method="L-BFGS-B", + bounds=[(1, None)], + jac=True, + options={"gtol": 1e-6, + "ftol": 64 * np.finfo(float).eps, + } + ) + + return temperature_minimizer.x[0] + + +class _TemperatureScaling(): + """Temperature Scaling model. + + Attributes + ---------- + T_ : float + The optimal temperature. + """ + + def __init__(self, + initial_temperature: float = None + ): + + self._initial_temperature: float = initial_temperature + + if initial_temperature is None: + self._initial_temperature = 1.5 + + def fit(self, + X, + y + ): + + self.T_: float = _temperature_scaling(X, y, self._initial_temperature) + + return self + + +@validate_params( + { + "y_true": ["array-like"], + "y_prob": ["array-like"], + "pos_label": [Real, str, "boolean", None], + "n_bins": [Interval(Integral, 1, None, closed="left")], + "strategy": [StrOptions({"uniform", "quantile"})], + }, + prefer_skip_nested_validation=True, +) +def calibration_curve( + y_true, + y_prob, + *, + pos_label=None, + n_bins=5, + strategy="uniform", +): + """Compute true and predicted probabilities for a calibration curve. + + The method assumes the inputs come from a binary classifier, and + discretize the [0, 1] interval into bins. + + Calibration curves may also be referred to as reliability diagrams. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + y_true : array-like of shape (n_samples,) + True targets. + + y_prob : array-like of shape (n_samples,) + Probabilities of the positive class. + + pos_label : int, float, bool or str, default=None + The label of the positive class. + + .. versionadded:: 1.1 + + n_bins : int, default=5 + Number of bins to discretize the [0, 1] interval. A bigger number + requires more data. Bins with no samples (i.e. without + corresponding values in `y_prob`) will not be returned, thus the + returned arrays may have less than `n_bins` values. + + strategy : {'uniform', 'quantile'}, default='uniform' + Strategy used to define the widths of the bins. + + uniform + The bins have identical widths. + quantile + The bins have the same number of samples and depend on `y_prob`. + + Returns + ------- + prob_true : ndarray of shape (n_bins,) or smaller + The proportion of samples whose class is the positive class, in each + bin (fraction of positives). + + prob_pred : ndarray of shape (n_bins,) or smaller + The mean predicted probability in each bin. + + References + ---------- + Alexandru Niculescu-Mizil and Rich Caruana (2005) Predicting Good + Probabilities With Supervised Learning, in Proceedings of the 22nd + International Conference on Machine Learning (ICML). + See section 4 (Qualitative Analysis of Predictions). + + Examples + -------- + >>> import numpy as np + >>> from sklearn.calibration import calibration_curve + >>> y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1]) + >>> y_pred = np.array([0.1, 0.2, 0.3, 0.4, 0.65, 0.7, 0.8, 0.9, 1.]) + >>> prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=3) + >>> prob_true + array([0. , 0.5, 1. ]) + >>> prob_pred + array([0.2 , 0.525, 0.85 ]) + """ + y_true = column_or_1d(y_true) + y_prob = column_or_1d(y_prob) + check_consistent_length(y_true, y_prob) + pos_label = _check_pos_label_consistency(pos_label, y_true) + + if y_prob.min() < 0 or y_prob.max() > 1: + raise ValueError("y_prob has values outside [0, 1].") + + labels = np.unique(y_true) + if len(labels) > 2: + raise ValueError( + f"Only binary classification is supported. Provided labels {labels}." + ) + y_true = y_true == pos_label + + if strategy == "quantile": # Determine bin edges by distribution of data + quantiles = np.linspace(0, 1, n_bins + 1) + bins = np.percentile(y_prob, quantiles * 100) + elif strategy == "uniform": + bins = np.linspace(0.0, 1.0, n_bins + 1) + else: + raise ValueError( + "Invalid entry to 'strategy' input. Strategy " + "must be either 'quantile' or 'uniform'." + ) + + binids = np.searchsorted(bins[1:-1], y_prob) + + bin_sums = np.bincount(binids, weights=y_prob, minlength=len(bins)) + bin_true = np.bincount(binids, weights=y_true, minlength=len(bins)) + bin_total = np.bincount(binids, minlength=len(bins)) + + nonzero = bin_total != 0 + prob_true = bin_true[nonzero] / bin_total[nonzero] + prob_pred = bin_sums[nonzero] / bin_total[nonzero] + + return prob_true, prob_pred + + +class CalibrationDisplay(_BinaryClassifierCurveDisplayMixin): + """Calibration curve (also known as reliability diagram) visualization. + + It is recommended to use + :func:`~sklearn.calibration.CalibrationDisplay.from_estimator` or + :func:`~sklearn.calibration.CalibrationDisplay.from_predictions` + to create a `CalibrationDisplay`. All parameters are stored as attributes. + + Read more about calibration in the :ref:`User Guide ` and + more about the scikit-learn visualization API in :ref:`visualizations`. + + .. versionadded:: 1.0 + + Parameters + ---------- + prob_true : ndarray of shape (n_bins,) + The proportion of samples whose class is the positive class (fraction + of positives), in each bin. + + prob_pred : ndarray of shape (n_bins,) + The mean predicted probability in each bin. + + y_prob : ndarray of shape (n_samples,) + Probability estimates for the positive class, for each sample. + + estimator_name : str, default=None + Name of estimator. If None, the estimator name is not shown. + + pos_label : int, float, bool or str, default=None + The positive class when computing the calibration curve. + By default, `pos_label` is set to `estimators.classes_[1]` when using + `from_estimator` and set to 1 when using `from_predictions`. + + .. versionadded:: 1.1 + + Attributes + ---------- + line_ : matplotlib Artist + Calibration curve. + + ax_ : matplotlib Axes + Axes with calibration curve. + + figure_ : matplotlib Figure + Figure containing the curve. + + See Also + -------- + calibration_curve : Compute true and predicted probabilities for a + calibration curve. + CalibrationDisplay.from_predictions : Plot calibration curve using true + and predicted labels. + CalibrationDisplay.from_estimator : Plot calibration curve using an + estimator and data. + + Examples + -------- + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.calibration import calibration_curve, CalibrationDisplay + >>> X, y = make_classification(random_state=0) + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, random_state=0) + >>> clf = LogisticRegression(random_state=0) + >>> clf.fit(X_train, y_train) + LogisticRegression(random_state=0) + >>> y_prob = clf.predict_proba(X_test)[:, 1] + >>> prob_true, prob_pred = calibration_curve(y_test, y_prob, n_bins=10) + >>> disp = CalibrationDisplay(prob_true, prob_pred, y_prob) + >>> disp.plot() + <...> + """ + + def __init__( + self, prob_true, prob_pred, y_prob, *, estimator_name=None, pos_label=None + ): + self.prob_true = prob_true + self.prob_pred = prob_pred + self.y_prob = y_prob + self.estimator_name = estimator_name + self.pos_label = pos_label + + def plot(self, *, ax=None, name=None, ref_line=True, **kwargs): + """Plot visualization. + + Extra keyword arguments will be passed to + :func:`matplotlib.pyplot.plot`. + + Parameters + ---------- + ax : Matplotlib Axes, default=None + Axes object to plot on. If `None`, a new figure and axes is + created. + + name : str, default=None + Name for labeling curve. If `None`, use `estimator_name` if + not `None`, otherwise no labeling is shown. + + ref_line : bool, default=True + If `True`, plots a reference line representing a perfectly + calibrated classifier. + + **kwargs : dict + Keyword arguments to be passed to :func:`matplotlib.pyplot.plot`. + + Returns + ------- + display : :class:`~sklearn.calibration.CalibrationDisplay` + Object that stores computed values. + """ + self.ax_, self.figure_, name = self._validate_plot_params(ax=ax, name=name) + + info_pos_label = ( + f"(Positive class: {self.pos_label})" if self.pos_label is not None else "" + ) + + line_kwargs = {"marker": "s", "linestyle": "-"} + if name is not None: + line_kwargs["label"] = name + line_kwargs.update(**kwargs) + + ref_line_label = "Perfectly calibrated" + existing_ref_line = ref_line_label in self.ax_.get_legend_handles_labels()[1] + if ref_line and not existing_ref_line: + self.ax_.plot([0, 1], [0, 1], "k:", label=ref_line_label) + self.line_ = self.ax_.plot(self.prob_pred, self.prob_true, **line_kwargs)[0] + + # We always have to show the legend for at least the reference line + self.ax_.legend(loc="lower right") + + xlabel = f"Mean predicted probability {info_pos_label}" + ylabel = f"Fraction of positives {info_pos_label}" + self.ax_.set(xlabel=xlabel, ylabel=ylabel) + + return self + + @classmethod + def from_estimator( + cls, + estimator, + X, + y, + *, + n_bins=5, + strategy="uniform", + pos_label=None, + name=None, + ref_line=True, + ax=None, + **kwargs, + ): + """Plot calibration curve using a binary classifier and data. + + A calibration curve, also known as a reliability diagram, uses inputs + from a binary classifier and plots the average predicted probability + for each bin against the fraction of positive classes, on the + y-axis. + + Extra keyword arguments will be passed to + :func:`matplotlib.pyplot.plot`. + + Read more about calibration in the :ref:`User Guide ` and + more about the scikit-learn visualization API in :ref:`visualizations`. + + .. versionadded:: 1.0 + + Parameters + ---------- + estimator : estimator instance + Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline` + in which the last estimator is a classifier. The classifier must + have a :term:`predict_proba` method. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + Input values. + + y : array-like of shape (n_samples,) + Binary target values. + + n_bins : int, default=5 + Number of bins to discretize the [0, 1] interval into when + calculating the calibration curve. A bigger number requires more + data. + + strategy : {'uniform', 'quantile'}, default='uniform' + Strategy used to define the widths of the bins. + + - `'uniform'`: The bins have identical widths. + - `'quantile'`: The bins have the same number of samples and depend + on predicted probabilities. + + pos_label : int, float, bool or str, default=None + The positive class when computing the calibration curve. + By default, `estimators.classes_[1]` is considered as the + positive class. + + .. versionadded:: 1.1 + + name : str, default=None + Name for labeling curve. If `None`, the name of the estimator is + used. + + ref_line : bool, default=True + If `True`, plots a reference line representing a perfectly + calibrated classifier. + + ax : matplotlib axes, default=None + Axes object to plot on. If `None`, a new figure and axes is + created. + + **kwargs : dict + Keyword arguments to be passed to :func:`matplotlib.pyplot.plot`. + + Returns + ------- + display : :class:`~sklearn.calibration.CalibrationDisplay`. + Object that stores computed values. + + See Also + -------- + CalibrationDisplay.from_predictions : Plot calibration curve using true + and predicted labels. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.calibration import CalibrationDisplay + >>> X, y = make_classification(random_state=0) + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, random_state=0) + >>> clf = LogisticRegression(random_state=0) + >>> clf.fit(X_train, y_train) + LogisticRegression(random_state=0) + >>> disp = CalibrationDisplay.from_estimator(clf, X_test, y_test) + >>> plt.show() + """ + y_prob, pos_label, name = cls._validate_and_get_response_values( + estimator, + X, + y, + response_method="predict_proba", + pos_label=pos_label, + name=name, + ) + + return cls.from_predictions( + y, + y_prob, + n_bins=n_bins, + strategy=strategy, + pos_label=pos_label, + name=name, + ref_line=ref_line, + ax=ax, + **kwargs, + ) + + @classmethod + def from_predictions( + cls, + y_true, + y_prob, + *, + n_bins=5, + strategy="uniform", + pos_label=None, + name=None, + ref_line=True, + ax=None, + **kwargs, + ): + """Plot calibration curve using true labels and predicted probabilities. + + Calibration curve, also known as reliability diagram, uses inputs + from a binary classifier and plots the average predicted probability + for each bin against the fraction of positive classes, on the + y-axis. + + Extra keyword arguments will be passed to + :func:`matplotlib.pyplot.plot`. + + Read more about calibration in the :ref:`User Guide ` and + more about the scikit-learn visualization API in :ref:`visualizations`. + + .. versionadded:: 1.0 + + Parameters + ---------- + y_true : array-like of shape (n_samples,) + True labels. + + y_prob : array-like of shape (n_samples,) + The predicted probabilities of the positive class. + + n_bins : int, default=5 + Number of bins to discretize the [0, 1] interval into when + calculating the calibration curve. A bigger number requires more + data. + + strategy : {'uniform', 'quantile'}, default='uniform' + Strategy used to define the widths of the bins. + + - `'uniform'`: The bins have identical widths. + - `'quantile'`: The bins have the same number of samples and depend + on predicted probabilities. + + pos_label : int, float, bool or str, default=None + The positive class when computing the calibration curve. + By default `pos_label` is set to 1. + + .. versionadded:: 1.1 + + name : str, default=None + Name for labeling curve. + + ref_line : bool, default=True + If `True`, plots a reference line representing a perfectly + calibrated classifier. + + ax : matplotlib axes, default=None + Axes object to plot on. If `None`, a new figure and axes is + created. + + **kwargs : dict + Keyword arguments to be passed to :func:`matplotlib.pyplot.plot`. + + Returns + ------- + display : :class:`~sklearn.calibration.CalibrationDisplay`. + Object that stores computed values. + + See Also + -------- + CalibrationDisplay.from_estimator : Plot calibration curve using an + estimator and data. + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> from sklearn.datasets import make_classification + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.calibration import CalibrationDisplay + >>> X, y = make_classification(random_state=0) + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, random_state=0) + >>> clf = LogisticRegression(random_state=0) + >>> clf.fit(X_train, y_train) + LogisticRegression(random_state=0) + >>> y_prob = clf.predict_proba(X_test)[:, 1] + >>> disp = CalibrationDisplay.from_predictions(y_test, y_prob) + >>> plt.show() + """ + pos_label_validated, name = cls._validate_from_predictions_params( + y_true, y_prob, sample_weight=None, pos_label=pos_label, name=name + ) + + prob_true, prob_pred = calibration_curve( + y_true, y_prob, n_bins=n_bins, strategy=strategy, pos_label=pos_label + ) + + disp = cls( + prob_true=prob_true, + prob_pred=prob_pred, + y_prob=y_prob, + estimator_name=name, + pos_label=pos_label_validated, + ) + return disp.plot(ax=ax, ref_line=ref_line, **kwargs) From 7e2a444cacecd0178b318ba98b51260003406be1 Mon Sep 17 00:00:00 2001 From: virchan Date: Thu, 9 May 2024 18:44:05 -0700 Subject: [PATCH 11/17] Added the `_TemperatureScaling` class and associated helper functions. --- sklearn/calibration_temperature.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py index 12f287482488d..fa1a4f1f0eb1c 100644 --- a/sklearn/calibration_temperature.py +++ b/sklearn/calibration_temperature.py @@ -254,7 +254,7 @@ class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator) HasMethods(["fit", "decision_function"]), None, ], - "method": [StrOptions({"isotonic", "sigmoid"})], + "method": [StrOptions({"isotonic", "sigmoid", "temperature"})], "cv": ["cv_object", StrOptions({"prefit"})], "n_jobs": [Integral, None], "ensemble": ["boolean"], @@ -665,9 +665,10 @@ def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None): calibrator.fit(this_pred, Y[:, class_idx], sample_weight) calibrators.append(calibrator) - elif method == 'Temperature_scaling': + elif method == 'temperature': calibrator = _TemperatureScaling() - calibrator.fit(predictions, Y, sample_weight) + calibrator.fit(predictions, Y) + calibrators.append(calibrator) pipeline = _CalibratedClassifier(clf, calibrators, method=method, classes=classes) return pipeline @@ -760,7 +761,7 @@ def predict_proba(self, X): ) # Temperature Scaling method - elif self.method == 'temperature_scaling': + elif self.method == 'temperature': assert len(self.calibrators) == 1, 'Temperature scaling should consists of one calibrator.' @@ -1057,10 +1058,26 @@ def fit(self, y ): - self.T_: float = _temperature_scaling(X, y, self._initial_temperature) + self.T_: float = _temperature_scaling(np.log(X), y, self._initial_temperature) return self + def predict(self, X): + """Predict new data by temperature-scaled softmax. + + Parameters + ---------- + X : array-like of shape (n_samples, n_classes) + Data to predict from. + + Returns + ------- + X_ : ndarray of shape (n_samples,) + The predicted data. + """ + + return _softmax_T(np.log(X), self.T_) + @validate_params( { From 7acc77906cd0b6c8d134589a10195d4f2523ea77 Mon Sep 17 00:00:00 2001 From: virchan Date: Tue, 18 Jun 2024 10:03:31 -0700 Subject: [PATCH 12/17] - Converted variables into lowercase to reduce warning messages. - Modified the `negative_log_likelihood` function to allow labels to be one-hot. --- sklearn/calibration_temperature.py | 61 +++++++++++++----------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py index fa1a4f1f0eb1c..48b9f2b7a1dae 100644 --- a/sklearn/calibration_temperature.py +++ b/sklearn/calibration_temperature.py @@ -63,7 +63,7 @@ ) -class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): +class CalibratedClassifierCV_test(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): """Probability calibration with isotonic regression or logistic regression. This class uses cross-validation to both estimate the parameters of a @@ -944,35 +944,33 @@ def _row_max_normalization(data: np.ndarray) -> np.ndarray: return data - row_max -def _softmax_T(predictions: np.ndarray, +def _softmax_t(predictions: np.ndarray, temperature: float, ) -> np.ndarray: """Softmax function scaled by the inverse temperature """ - softmax_T_output: np.ndarray = predictions - softmax_T_output = _row_max_normalization(softmax_T_output) - softmax_T_output /= temperature - softmax_T_output = softmax(softmax_T_output, - axis=1 - ) - softmax_T_output = softmax_T_output.astype(dtype=predictions.dtype) + softmax_t_output: np.ndarray = predictions + softmax_t_output = _row_max_normalization(softmax_t_output) + softmax_t_output /= temperature + softmax_t_output = softmax(softmax_t_output, axis=1) + softmax_t_output = softmax_t_output.astype(dtype=predictions.dtype) - return softmax_T_output + return softmax_t_output -def _exp_T(predictions: np.ndarray, +def _exp_t(predictions: np.ndarray, temperature: float ) -> np.ndarray: """Scale by inverse temperature, and then apply the nature exponential function """ - exp_T_output: np.ndarray = predictions - exp_T_output = _row_max_normalization(exp_T_output) - exp_T_output /= temperature - exp_T_output = np.exp(exp_T_output) + exp_t_output: np.ndarray = predictions + exp_t_output = _row_max_normalization(exp_t_output) + exp_t_output /= temperature + exp_t_output = np.exp(exp_t_output) - return exp_T_output + return exp_t_output def _temperature_scaling(predictions: np.ndarray, @@ -987,43 +985,38 @@ def negative_log_likelihood(temperature: float): with respect to Temperature """ - # Losses - losses: np.ndarray = _softmax_T(predictions, - temperature - ) + # Initiate the Losses + losses: np.ndarray = _softmax_t(predictions, temperature) + class_indices: np.ndarray = np.argmax(labels, axis=1) # Select the probability of the correct class - losses = losses[np.arange(losses.shape[0]), - labels - ] + losses = losses[np.arange(losses.shape[0]), class_indices] losses = np.log(losses) - # Derivates with respect to Temperature - exp_T: np.ndarray = _exp_T(predictions, temperature) - exp_T_sum = exp_T.sum(axis=1) + # Derivatives with respect to Temperature + exp_t: np.ndarray = _exp_t(predictions, temperature) + exp_t_sum = exp_t.sum(axis=1) term_1: np.ndarray = _row_max_normalization(predictions) term_1 /= temperature ** 2 - term_1 = - term_1[np.arange(term_1.shape[0]), - labels - ] - term_1 *= exp_T_sum + term_1 = - term_1[np.arange(term_1.shape[0]), class_indices] + term_1 *= exp_t_sum term_2: np.ndarray = _row_max_normalization(predictions) term_2 /= temperature ** 2 term_2 = _row_max_normalization(term_2) - term_2 *= exp_T + term_2 *= exp_t term_2 = term_2.sum(axis=1) - dL_dts: np.ndarray = (term_1 + term_2) / exp_T_sum + dL_dts: np.ndarray = (term_1 + term_2) / exp_t_sum # print(f"{-losses.sum() = }, {-dL_dts.sum() = }") return -losses.sum(), -dL_dts.sum() temperature_minimizer: minimize = minimize(negative_log_likelihood, - initial_temperature, + np.array([initial_temperature]), method="L-BFGS-B", bounds=[(1, None)], jac=True, @@ -1076,7 +1069,7 @@ def predict(self, X): The predicted data. """ - return _softmax_T(np.log(X), self.T_) + return _softmax_t(np.log(X), self.T_) @validate_params( From 25a1bf2324ff6b2326368fe5d7920bf4bef19ff4 Mon Sep 17 00:00:00 2001 From: virchan Date: Tue, 18 Jun 2024 11:19:50 -0700 Subject: [PATCH 13/17] - Converted variables into lowercase to reduce warning messages. - Modified the `negative_log_likelihood` function to allow labels to be one-hot. - Added the `_temperature_scaling_test.py` file. --- sklearn/_temperature_scaling_test.py | 40 ++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 sklearn/_temperature_scaling_test.py diff --git a/sklearn/_temperature_scaling_test.py b/sklearn/_temperature_scaling_test.py new file mode 100644 index 0000000000000..991ade2b066db --- /dev/null +++ b/sklearn/_temperature_scaling_test.py @@ -0,0 +1,40 @@ +''' +This file is created to test if the custom 'TemperatureScaling' class runs properly, +and serves as proof of work for the changes made to the scikit-learn repository. +Reference: https://github.com/scikit-learn/scikit-learn/issues/28574 + +The file also includes examples related to developing a temperature scaling method +for probability calibration in multi-class classification. +''' + +from sklearn.calibration_temperature import CalibratedClassifierCV_test +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.svm import SVC +from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier + +# Load the Iris dataset +X, y = datasets.load_iris(return_X_y=True) +X_train, X_calib, y_train, y_calib = train_test_split(X, y) + +# Load the following classifiers for testing +SV_classifier: SVC = SVC(probability=True) +Logistic_classifier: LogisticRegression = LogisticRegression() +Tree_classifier: DecisionTreeClassifier = DecisionTreeClassifier() + +# Initiate the calibrators for the classifiers +SVC_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(SV_classifier, cv=3, method='temperature') +Logistic_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Logistic_classifier, cv=3, method='temperature') +Tree_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Tree_classifier, cv=3, method='temperature') + +# Fit all classifier-calibrator pairs +SVC_scaled.fit(X_train,y_train) +Logistic_scaled.fit(X_train,y_train) +Tree_scaled.fit(X_train,y_train) + +print("Optimal Temperatures For Each Classifiers") +print(f"- SVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0].T_}") +print(f"- Logistic: {Logistic_scaled.calibrated_classifiers_[0].calibrators[0].T_}") +print(f"- Decision Tree: {Tree_scaled.calibrated_classifiers_[0].calibrators[0].T_}") + From 26f458d3f8d181b7fd9814f99dd61834ffbbfd9f Mon Sep 17 00:00:00 2001 From: virchan Date: Wed, 19 Jun 2024 13:03:44 -0700 Subject: [PATCH 14/17] Added doc-strings to temperature-scaling-related functions. --- sklearn/_temperature_scaling_test.py | 2 + sklearn/calibration_temperature.py | 145 +++++++++++++++++++++++---- 2 files changed, 130 insertions(+), 17 deletions(-) diff --git a/sklearn/_temperature_scaling_test.py b/sklearn/_temperature_scaling_test.py index 991ade2b066db..66c3ffdc56f91 100644 --- a/sklearn/_temperature_scaling_test.py +++ b/sklearn/_temperature_scaling_test.py @@ -33,6 +33,8 @@ Logistic_scaled.fit(X_train,y_train) Tree_scaled.fit(X_train,y_train) +print(f" Initial temperatureSVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0]._initial_temperature}") + print("Optimal Temperatures For Each Classifiers") print(f"- SVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0].T_}") print(f"- Logistic: {Logistic_scaled.calibrated_classifiers_[0].calibrators[0].T_}") diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py index 48b9f2b7a1dae..20b211a411b18 100644 --- a/sklearn/calibration_temperature.py +++ b/sklearn/calibration_temperature.py @@ -64,7 +64,7 @@ class CalibratedClassifierCV_test(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): - """Probability calibration with isotonic regression or logistic regression. + """Probability calibration with isotonic regression, logistic regression, or temperature scaling (in-progress). This class uses cross-validation to both estimate the parameters of a classifier and subsequently calibrate a classifier. With default @@ -98,11 +98,12 @@ class CalibratedClassifierCV_test(ClassifierMixin, MetaEstimatorMixin, BaseEstim .. versionadded:: 1.2 - method : {'sigmoid', 'isotonic'}, default='sigmoid' + method : {'sigmoid', 'isotonic', 'temperature'}, default='sigmoid' The method to use for calibration. Can be 'sigmoid' which - corresponds to Platt's method (i.e. a logistic regression model) or - 'isotonic' which is a non-parametric approach. It is not advised to - use isotonic calibration with too few calibration samples + corresponds to Platt's method (i.e. a logistic regression model), + 'isotonic' which is a non-parametric approach, or 'temperature' + which corresponds to the temperature scalingt method. It is not + advised to use isotonic calibration with too few calibration samples ``(<<1000)`` since it tends to overfit. cv : int, cross-validation generator, iterable or "prefit", \ @@ -211,6 +212,9 @@ class CalibratedClassifierCV_test(ClassifierMixin, MetaEstimatorMixin, BaseEstim .. [4] Predicting Good Probabilities with Supervised Learning, A. Niculescu-Mizil & R. Caruana, ICML 2005 + .. [5] On Calibration of Modern Neural Networks, + C. Guo, G. Pleiss, Y. Sun & K. Q. Weinberger, ICML 2017 + Examples -------- >>> from sklearn.datasets import make_classification @@ -933,9 +937,20 @@ def predict(self, T): def _row_max_normalization(data: np.ndarray) -> np.ndarray: - """Normalise the output by subtracting - the per-row maximum element. + """Normalize the input data by subtracting the maximum value of each row. + + Parameters + ---------- + data : np.ndarray + The input data array of shape (n_samples, n_classes). + + Returns + ------- + np.ndarray + A 2D array of the same shape as `data` where each row has been normalized + by subtracting the maximum value of that row. """ + row_max: np.ndarray = np.max(data, axis=1, keepdims=True @@ -947,7 +962,27 @@ def _row_max_normalization(data: np.ndarray) -> np.ndarray: def _softmax_t(predictions: np.ndarray, temperature: float, ) -> np.ndarray: - """Softmax function scaled by the inverse temperature + """Compute the temperature-scaled softmax of the input predictions. + + Parameters + ---------- + predictions : np.ndarray + The input predictions array of shape (n_sample, n_classes). + + temperature : float + The temperature parameter for scaling. + + Returns + ------- + np.ndarray + A 2D array of the same shape as `predictions` containing the temperature-scaled + softmax probabilities. + + Notes + ----- + - This function internally normalizes the predictions by subtracting the row-wise + maximum to improve numerical stability before scaling by the temperature. + - The softmax computation is done along the last axis of the input predictions. """ softmax_t_output: np.ndarray = predictions @@ -962,7 +997,27 @@ def _softmax_t(predictions: np.ndarray, def _exp_t(predictions: np.ndarray, temperature: float ) -> np.ndarray: - """Scale by inverse temperature, and then apply the nature exponential function + """Scale predictions by the inverse temperature and apply the exponential function. + + Parameters + ---------- + predictions : np.ndarray + The input predictions array of shape (n_samples, n_classes). + + temperature : float + The temperature parameter for scaling. + + Returns + ------- + np.ndarray + A 2D array of the same shape as `predictions` containing the scaled and + exponentiated values. + + Notes + ----- + - This function internally normalizes the predictions by subtracting the row-wise + maximum to improve numerical stability before scaling by the temperature and + applying the exponential function. """ exp_t_output: np.ndarray = predictions @@ -977,12 +1032,46 @@ def _temperature_scaling(predictions: np.ndarray, labels: np.ndarray, initial_temperature: float ) -> float: - """ Minimize the Negative Log Likelihood Loss with respect to Temperature + """Probability Calibration with temperature scaling (Guo-Pleiss-Sun-Weinberger 2017). + + Parameters + ---------- + predictions : ndarray of shape (n_samples,) + The decision function or predict proba for the samples. + + labels : ndarray of shape (n_samples, n_classes) + One-hot encoded true labels for the samples. + + initial_temperature : float + Initial temperature value to start the optimisation + + Returns + ------- + float + The optimised temperature parameter for probability calibration, with a + value in the range [1, infinity). + + References + ---------- + Guo, Pleiss, Sun & Weinberger, "On Calibration of Modern Neural Networks" """ def negative_log_likelihood(temperature: float): - """Negative Log Likelihood Loss and its Derivative - with respect to Temperature + """ Compute the negative log likelihood loss and its derivative + with respect to temperature. + + Parameters + ---------- + temperature : float + The current temperature value during optimisation. + + Returns + ------- + float + The negative log likelihood loss. + float + The derivative of the negative log likelihood loss with respect to + temperature. """ # Initiate the Losses @@ -1009,11 +1098,11 @@ def negative_log_likelihood(temperature: float): term_2 *= exp_t term_2 = term_2.sum(axis=1) - dL_dts: np.ndarray = (term_1 + term_2) / exp_t_sum + dlosses_dts: np.ndarray = (term_1 + term_2) / exp_t_sum # print(f"{-losses.sum() = }, {-dL_dts.sum() = }") - return -losses.sum(), -dL_dts.sum() + return -losses.sum(), -dlosses_dts.sum() temperature_minimizer: minimize = minimize(negative_log_likelihood, np.array([initial_temperature]), @@ -1033,8 +1122,14 @@ class _TemperatureScaling(): Attributes ---------- + + _initial_temperature: float or None + Initial temperature value to start the optimisation. + If None, the it is set to 1.5. + + T_ : float - The optimal temperature. + The optimised temperature for probability calibration. """ def __init__(self, @@ -1050,6 +1145,21 @@ def fit(self, X, y ): + """Fit the model using X, y as training data. + + Parameters + ---------- + X : array-like of shape (n_samples, n_classes) + Training data. + + y : array-like of shape (n_samples, n_classes) + Training target. + + Returns + ------- + self : object + Returns an instance of self. + """ self.T_: float = _temperature_scaling(np.log(X), y, self._initial_temperature) @@ -1061,11 +1171,12 @@ def predict(self, X): Parameters ---------- X : array-like of shape (n_samples, n_classes) - Data to predict from. + The decision function or predict proba for the samples + Returns ------- - X_ : ndarray of shape (n_samples,) + ndarray of shape (n_samples, n_classes) The predicted data. """ From 6160ee18418a58ac6070f532a6bf85fbe6b571d0 Mon Sep 17 00:00:00 2001 From: virchan Date: Fri, 12 Jul 2024 14:56:20 -0700 Subject: [PATCH 15/17] Modified the `.fit()` method of temperature scaling. Now it can handle outputs from `decision_function` function. Also added the `_additive_smoothing` function to avoid numerical instability when applying logarithm. --- sklearn/_temperature_scaling_test.py | 27 ++++++-- sklearn/calibration_temperature.py | 96 ++++++++++++++++++++++------ 2 files changed, 99 insertions(+), 24 deletions(-) diff --git a/sklearn/_temperature_scaling_test.py b/sklearn/_temperature_scaling_test.py index 66c3ffdc56f91..92982de2f4344 100644 --- a/sklearn/_temperature_scaling_test.py +++ b/sklearn/_temperature_scaling_test.py @@ -5,6 +5,7 @@ The file also includes examples related to developing a temperature scaling method for probability calibration in multi-class classification. + ''' from sklearn.calibration_temperature import CalibratedClassifierCV_test @@ -22,21 +23,37 @@ SV_classifier: SVC = SVC(probability=True) Logistic_classifier: LogisticRegression = LogisticRegression() Tree_classifier: DecisionTreeClassifier = DecisionTreeClassifier() +# compare_classifier = DecisionTreeClassifier() # Initiate the calibrators for the classifiers -SVC_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(SV_classifier, cv=3, method='temperature') -Logistic_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Logistic_classifier, cv=3, method='temperature') -Tree_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Tree_classifier, cv=3, method='temperature') +SVC_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(SV_classifier, + cv=3, + method='temperature' + ) +Logistic_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Logistic_classifier, + cv=3, + method='temperature' + ) +Tree_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Tree_classifier, + cv=3, + method='temperature' + ) # Fit all classifier-calibrator pairs SVC_scaled.fit(X_train,y_train) Logistic_scaled.fit(X_train,y_train) Tree_scaled.fit(X_train,y_train) +# compare_classifier.fit(X_train, y_train) -print(f" Initial temperatureSVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0]._initial_temperature}") - +# print(f" Initial temperatureSVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0]._initial_temperature}") print("Optimal Temperatures For Each Classifiers") print(f"- SVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0].T_}") print(f"- Logistic: {Logistic_scaled.calibrated_classifiers_[0].calibrators[0].T_}") print(f"- Decision Tree: {Tree_scaled.calibrated_classifiers_[0].calibrators[0].T_}") +print("Printing calibrated probabilities...") +print(f"{SVC_scaled.predict_proba((X_calib)) = }") +print(f"{Logistic_scaled.predict_proba((X_calib))=}") +print(f"{Tree_scaled.predict_proba(X_calib)=}") +# print(f"{compare_classifier.predict_proba(X_calib)=}") +print(f"{y_calib=}") diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py index 20b211a411b18..04fe0c7a92f67 100644 --- a/sklearn/calibration_temperature.py +++ b/sklearn/calibration_temperature.py @@ -463,6 +463,7 @@ def fit(self, X, y, sample_weight=None, **fit_params): self.n_features_in_ = first_clf.n_features_in_ if hasattr(first_clf, "feature_names_in_"): self.feature_names_in_ = first_clf.feature_names_in_ + return self def predict_proba(self, X): @@ -959,6 +960,38 @@ def _row_max_normalization(data: np.ndarray) -> np.ndarray: return data - row_max +def _additive_smoothing(probabilities: np.ndarray) -> np.ndarray: + """Additive Smoothing. + Modify the original probability array to avoid numerical instability when + applying logarithm. + + This method adjusts probabilities to avoid exact 0 or 1 values by using + a fixed transformation. The transformation ensures that probabilities + are within a safe range for logarithmic operations. + + For more details, refer to: + https://en.wikipedia.org/wiki/Additive_smoothing + + Parameters + ---------- + probabilities : np.ndarray + The input 2D numpy array of probabilities. + + Returns + ------- + np.ndarray + The smoothed probability array, with values adjusted to avoid 0 and 1. + """ + + n_classes: int = probabilities.shape[1] + + smooth_probabilities: np.ndarray = (probabilities * (n_classes - 1) + 0.5) / n_classes + + smooth_probabilities = smooth_probabilities.astype(dtype=probabilities.dtype) + + return smooth_probabilities + + def _softmax_t(predictions: np.ndarray, temperature: float, ) -> np.ndarray: @@ -977,16 +1010,9 @@ def _softmax_t(predictions: np.ndarray, np.ndarray A 2D array of the same shape as `predictions` containing the temperature-scaled softmax probabilities. - - Notes - ----- - - This function internally normalizes the predictions by subtracting the row-wise - maximum to improve numerical stability before scaling by the temperature. - - The softmax computation is done along the last axis of the input predictions. """ softmax_t_output: np.ndarray = predictions - softmax_t_output = _row_max_normalization(softmax_t_output) softmax_t_output /= temperature softmax_t_output = softmax(softmax_t_output, axis=1) softmax_t_output = softmax_t_output.astype(dtype=predictions.dtype) @@ -1012,16 +1038,10 @@ def _exp_t(predictions: np.ndarray, np.ndarray A 2D array of the same shape as `predictions` containing the scaled and exponentiated values. - - Notes - ----- - - This function internally normalizes the predictions by subtracting the row-wise - maximum to improve numerical stability before scaling by the temperature and - applying the exponential function. """ exp_t_output: np.ndarray = predictions - exp_t_output = _row_max_normalization(exp_t_output) + # exp_t_output = _row_max_normalization(exp_t_output) exp_t_output /= temperature exp_t_output = np.exp(exp_t_output) @@ -1087,14 +1107,16 @@ def negative_log_likelihood(temperature: float): exp_t: np.ndarray = _exp_t(predictions, temperature) exp_t_sum = exp_t.sum(axis=1) - term_1: np.ndarray = _row_max_normalization(predictions) + # term_1: np.ndarray = _row_max_normalization(predictions) + term_1: np.ndarray = _additive_smoothing(predictions) term_1 /= temperature ** 2 term_1 = - term_1[np.arange(term_1.shape[0]), class_indices] term_1 *= exp_t_sum - term_2: np.ndarray = _row_max_normalization(predictions) + # term_2: np.ndarray = _row_max_normalization(predictions) + term_2: np.ndarray = _additive_smoothing(predictions) term_2 /= temperature ** 2 - term_2 = _row_max_normalization(term_2) + # term_2 = _row_max_normalization(term_2) term_2 *= exp_t term_2 = term_2.sum(axis=1) @@ -1117,6 +1139,31 @@ def negative_log_likelihood(temperature: float): return temperature_minimizer.x[0] +def _is_predict_proba(X: np.ndarray) -> bool: + """ + Helper function to check if the input array contains probabilities. + + Specifically, it checks if all rows in the array sum to 1 and if all + entries are floats between 0 and 1. + + Parameters: + ---------- + np.ndarray: The input 2D numpy array. + + Returns: + -------- + bool: True if the array is likely to be probabilities, False if it is likely to be logits. + """ + + # Check if all entries are between 0 and 1 + entries_zero_to_one: bool = np.all((X >= 0) & (X <= 1)) + + # Check if each row sums approximately to 1 + row_sums_to_one: bool = np.all(np.isclose(np.sum(X, axis=1), 1.0)) + + return entries_zero_to_one and row_sums_to_one + + class _TemperatureScaling(): """Temperature Scaling model. @@ -1161,7 +1208,14 @@ def fit(self, Returns an instance of self. """ - self.T_: float = _temperature_scaling(np.log(X), y, self._initial_temperature) + # If X are outputs of `decision_function` + # i.e., logits (e.g., SVC(probability=False) ) + if _is_predict_proba(X): + self.T_ = _temperature_scaling(np.log(_additive_smoothing(X)), y, self._initial_temperature) + + # If X are outputs of `predict_proba` + else: + self.T_ = _temperature_scaling(X, y, self._initial_temperature) return self @@ -1180,7 +1234,11 @@ def predict(self, X): The predicted data. """ - return _softmax_t(np.log(X), self.T_) + if _is_predict_proba(X): + return _softmax_t(np.log(_additive_smoothing(X)), self.T_) + + else: + return _softmax_t(X, self.T_) @validate_params( From 67afee5d665ceb30938aba5aa41866d840d8a16a Mon Sep 17 00:00:00 2001 From: virchan Date: Wed, 17 Jul 2024 14:59:03 -0700 Subject: [PATCH 16/17] 1. Modified the `_TemperatureScaling` class to adept `sample_weight` argument. 2. Modified the `_temperature_scaling` function. The initial temperature is now 1.0, and the optimised temperature is in interval [1e-2, inf). 3. Revise doc-strings. --- sklearn/_temperature_scaling_test.py | 24 ++--- sklearn/calibration_temperature.py | 127 ++++++++++++++------------- 2 files changed, 81 insertions(+), 70 deletions(-) diff --git a/sklearn/_temperature_scaling_test.py b/sklearn/_temperature_scaling_test.py index 92982de2f4344..51cfbea37e33f 100644 --- a/sklearn/_temperature_scaling_test.py +++ b/sklearn/_temperature_scaling_test.py @@ -1,11 +1,15 @@ ''' This file is created to test if the custom 'TemperatureScaling' class runs properly, and serves as proof of work for the changes made to the scikit-learn repository. -Reference: https://github.com/scikit-learn/scikit-learn/issues/28574 The file also includes examples related to developing a temperature scaling method for probability calibration in multi-class classification. + +References: +----------- + .. [1] https://github.com/scikit-learn/scikit-learn/issues/28574. Original issue + on Github. ''' from sklearn.calibration_temperature import CalibratedClassifierCV_test @@ -20,10 +24,10 @@ X_train, X_calib, y_train, y_calib = train_test_split(X, y) # Load the following classifiers for testing -SV_classifier: SVC = SVC(probability=True) +SV_classifier: SVC = SVC(probability=False) Logistic_classifier: LogisticRegression = LogisticRegression() Tree_classifier: DecisionTreeClassifier = DecisionTreeClassifier() -# compare_classifier = DecisionTreeClassifier() + # Initiate the calibrators for the classifiers SVC_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(SV_classifier, @@ -31,7 +35,7 @@ method='temperature' ) Logistic_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Logistic_classifier, - cv=3, + cv=7, method='temperature' ) Tree_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(Tree_classifier, @@ -39,21 +43,21 @@ method='temperature' ) + # Fit all classifier-calibrator pairs SVC_scaled.fit(X_train,y_train) Logistic_scaled.fit(X_train,y_train) Tree_scaled.fit(X_train,y_train) # compare_classifier.fit(X_train, y_train) -# print(f" Initial temperatureSVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0]._initial_temperature}") print("Optimal Temperatures For Each Classifiers") -print(f"- SVC: {SVC_scaled.calibrated_classifiers_[0].calibrators[0].T_}") -print(f"- Logistic: {Logistic_scaled.calibrated_classifiers_[0].calibrators[0].T_}") -print(f"- Decision Tree: {Tree_scaled.calibrated_classifiers_[0].calibrators[0].T_}") +print(f"{SVC_scaled.calibrated_classifiers_[0].calibrators[0].T_=}") +print(f"{Logistic_scaled.calibrated_classifiers_[0].calibrators[0].T_=}") +print(f"{Tree_scaled.calibrated_classifiers_[0].calibrators[0].T_=}") print("Printing calibrated probabilities...") -print(f"{SVC_scaled.predict_proba((X_calib)) = }") -print(f"{Logistic_scaled.predict_proba((X_calib))=}") +print(f"{SVC_scaled.predict_proba(X_calib) = }") +print(f"{Logistic_scaled.predict_proba(X_calib) = }") print(f"{Tree_scaled.predict_proba(X_calib)=}") # print(f"{compare_classifier.predict_proba(X_calib)=}") print(f"{y_calib=}") diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py index 04fe0c7a92f67..b7c656842f559 100644 --- a/sklearn/calibration_temperature.py +++ b/sklearn/calibration_temperature.py @@ -672,7 +672,7 @@ def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None): elif method == 'temperature': calibrator = _TemperatureScaling() - calibrator.fit(predictions, Y) + calibrator.fit(predictions, Y, sample_weight) calibrators.append(calibrator) pipeline = _CalibratedClassifier(clf, calibrators, method=method, classes=classes) @@ -943,12 +943,12 @@ def _row_max_normalization(data: np.ndarray) -> np.ndarray: Parameters ---------- data : np.ndarray - The input data array of shape (n_samples, n_classes). + The input array. Returns ------- np.ndarray - A 2D array of the same shape as `data` where each row has been normalized + An array of the same shape as `data` where each row has been normalized by subtracting the maximum value of that row. """ @@ -974,13 +974,13 @@ def _additive_smoothing(probabilities: np.ndarray) -> np.ndarray: Parameters ---------- - probabilities : np.ndarray - The input 2D numpy array of probabilities. + probabilities : np.ndarray + The input 2D numpy array of probabilities. Returns ------- - np.ndarray - The smoothed probability array, with values adjusted to avoid 0 and 1. + np.ndarray + The smoothed probability array, with values adjusted to avoid 0 and 1. """ n_classes: int = probabilities.shape[1] @@ -992,15 +992,15 @@ def _additive_smoothing(probabilities: np.ndarray) -> np.ndarray: return smooth_probabilities -def _softmax_t(predictions: np.ndarray, +def _softmax_t(X: np.ndarray, temperature: float, ) -> np.ndarray: - """Compute the temperature-scaled softmax of the input predictions. + """Compute the temperature-scaled softmax of the input array. Parameters ---------- - predictions : np.ndarray - The input predictions array of shape (n_sample, n_classes). + X : np.ndarray + The input array. temperature : float The temperature parameter for scaling. @@ -1008,27 +1008,28 @@ def _softmax_t(predictions: np.ndarray, Returns ------- np.ndarray - A 2D array of the same shape as `predictions` containing the temperature-scaled + An array of the same shape as the input containing the temperature-scaled softmax probabilities. """ - softmax_t_output: np.ndarray = predictions + softmax_t_output: np.ndarray = X + softmax_t_output = _row_max_normalization(softmax_t_output) softmax_t_output /= temperature softmax_t_output = softmax(softmax_t_output, axis=1) - softmax_t_output = softmax_t_output.astype(dtype=predictions.dtype) + softmax_t_output = softmax_t_output.astype(dtype=X.dtype) return softmax_t_output -def _exp_t(predictions: np.ndarray, +def _exp_t(X: np.ndarray, temperature: float ) -> np.ndarray: """Scale predictions by the inverse temperature and apply the exponential function. Parameters ---------- - predictions : np.ndarray - The input predictions array of shape (n_samples, n_classes). + X : np.ndarray + The input array. temperature : float The temperature parameter for scaling. @@ -1036,21 +1037,23 @@ def _exp_t(predictions: np.ndarray, Returns ------- np.ndarray - A 2D array of the same shape as `predictions` containing the scaled and + An array of the same shape as the input containing the temperature-scaled and exponentiated values. """ - exp_t_output: np.ndarray = predictions - # exp_t_output = _row_max_normalization(exp_t_output) + exp_t_output: np.ndarray = X + exp_t_output = _row_max_normalization(exp_t_output) exp_t_output /= temperature exp_t_output = np.exp(exp_t_output) + exp_t_output = exp_t_output.astype(dtype=X.dtype) return exp_t_output def _temperature_scaling(predictions: np.ndarray, labels: np.ndarray, - initial_temperature: float + sample_weight=None, + initial_temperature: float = 1.0 ) -> float: """Probability Calibration with temperature scaling (Guo-Pleiss-Sun-Weinberger 2017). @@ -1062,8 +1065,11 @@ def _temperature_scaling(predictions: np.ndarray, labels : ndarray of shape (n_samples, n_classes) One-hot encoded true labels for the samples. - initial_temperature : float - Initial temperature value to start the optimisation + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If None, then samples are equally weighted. + + initial_temperature : float, default=1.0 + Initial temperature value to start the optimisation. Returns ------- @@ -1089,6 +1095,7 @@ def negative_log_likelihood(temperature: float): ------- float The negative log likelihood loss. + float The derivative of the negative log likelihood loss with respect to temperature. @@ -1103,33 +1110,38 @@ def negative_log_likelihood(temperature: float): losses = np.log(losses) + # Apply sample weight + if sample_weight is not None: + losses *= sample_weight + # Derivatives with respect to Temperature exp_t: np.ndarray = _exp_t(predictions, temperature) exp_t_sum = exp_t.sum(axis=1) - # term_1: np.ndarray = _row_max_normalization(predictions) - term_1: np.ndarray = _additive_smoothing(predictions) + term_1: np.ndarray = predictions + term_1 = _row_max_normalization(predictions) term_1 /= temperature ** 2 term_1 = - term_1[np.arange(term_1.shape[0]), class_indices] term_1 *= exp_t_sum - # term_2: np.ndarray = _row_max_normalization(predictions) - term_2: np.ndarray = _additive_smoothing(predictions) + term_2: np.ndarray = predictions + term_2 = _row_max_normalization(term_2) term_2 /= temperature ** 2 - # term_2 = _row_max_normalization(term_2) term_2 *= exp_t term_2 = term_2.sum(axis=1) dlosses_dts: np.ndarray = (term_1 + term_2) / exp_t_sum - # print(f"{-losses.sum() = }, {-dL_dts.sum() = }") + # Apply sample weight + if sample_weight is not None: + dlosses_dts *= sample_weight return -losses.sum(), -dlosses_dts.sum() temperature_minimizer: minimize = minimize(negative_log_likelihood, np.array([initial_temperature]), method="L-BFGS-B", - bounds=[(1, None)], + bounds=[(1e-2, None)], jac=True, options={"gtol": 1e-6, "ftol": 64 * np.finfo(float).eps, @@ -1140,19 +1152,20 @@ def negative_log_likelihood(temperature: float): def _is_predict_proba(X: np.ndarray) -> bool: - """ - Helper function to check if the input array contains probabilities. - + """Helper function to check if the input array contains probabilities. Specifically, it checks if all rows in the array sum to 1 and if all entries are floats between 0 and 1. Parameters: ---------- - np.ndarray: The input 2D numpy array. + X : np.ndarray + The input numpy array of shape (n_samples, n_classes). Returns: -------- - bool: True if the array is likely to be probabilities, False if it is likely to be logits. + bool + True if the array is likely to be output of `predict_proba`, + False if it is likely to be output of `decision_function`. """ # Check if all entries are between 0 and 1 @@ -1164,43 +1177,36 @@ def _is_predict_proba(X: np.ndarray) -> bool: return entries_zero_to_one and row_sums_to_one -class _TemperatureScaling(): +class _TemperatureScaling(RegressorMixin, BaseEstimator): """Temperature Scaling model. Attributes ---------- - - _initial_temperature: float or None - Initial temperature value to start the optimisation. - If None, the it is set to 1.5. - - T_ : float The optimised temperature for probability calibration. + Available after the calibrator is fitted. """ - def __init__(self, - initial_temperature: float = None - ): - - self._initial_temperature: float = initial_temperature - - if initial_temperature is None: - self._initial_temperature = 1.5 def fit(self, X, - y + y, + sample_weight=None ): """Fit the model using X, y as training data. Parameters ---------- - X : array-like of shape (n_samples, n_classes) + X : np.ndarray + array-like of shape (n_samples, n_classes). Training data. - y : array-like of shape (n_samples, n_classes) - Training target. + y : np.ndarray + array-like of shape (n_samples, n_classes) + Training labels. + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If None, then samples are equally weighted. Returns ------- @@ -1211,11 +1217,11 @@ def fit(self, # If X are outputs of `decision_function` # i.e., logits (e.g., SVC(probability=False) ) if _is_predict_proba(X): - self.T_ = _temperature_scaling(np.log(_additive_smoothing(X)), y, self._initial_temperature) + self.T_ = _temperature_scaling(np.log(_additive_smoothing(X)), y, sample_weight) # If X are outputs of `predict_proba` else: - self.T_ = _temperature_scaling(X, y, self._initial_temperature) + self.T_ = _temperature_scaling(X, y, sample_weight) return self @@ -1224,13 +1230,14 @@ def predict(self, X): Parameters ---------- - X : array-like of shape (n_samples, n_classes) - The decision function or predict proba for the samples - + X : np.ndarray + array-like of shape (n_samples, n_classes) + The output of `decision_function` or `predict_proba`. Returns ------- - ndarray of shape (n_samples, n_classes) + np.ndarray + ndarray of shape (n_samples, n_classes) The predicted data. """ From e72adfec27cf806fad2bf44c23c54f7deff5980c Mon Sep 17 00:00:00 2001 From: virchan Date: Thu, 18 Jul 2024 10:50:41 -0700 Subject: [PATCH 17/17] Modified `_temperature_scaling_test.py` and `calibration_temperature.py` for the first PR draft. --- sklearn/_temperature_scaling_test.py | 30 ++++++++++++++++++---------- sklearn/calibration_temperature.py | 6 +++--- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/sklearn/_temperature_scaling_test.py b/sklearn/_temperature_scaling_test.py index 51cfbea37e33f..e0a95523ed58b 100644 --- a/sklearn/_temperature_scaling_test.py +++ b/sklearn/_temperature_scaling_test.py @@ -8,8 +8,11 @@ References: ----------- - .. [1] https://github.com/scikit-learn/scikit-learn/issues/28574. Original issue - on Github. + .. [1] https://github.com/scikit-learn/scikit-learn/issues/28574. Original issue + on Github. + + .. [2] On Calibration of Modern Neural Networks, + C. Guo, G. Pleiss, Y. Sun & K. Q. Weinberger, ICML 2017 ''' from sklearn.calibration_temperature import CalibratedClassifierCV_test @@ -19,17 +22,20 @@ from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier -# Load the Iris dataset +# We demonstrate with the Iris dataset, because +# it is small, multi-class, and self-provided. X, y = datasets.load_iris(return_X_y=True) X_train, X_calib, y_train, y_calib = train_test_split(X, y) # Load the following classifiers for testing +# - Support vector classifier +# - Logistic regressor +# - Decision tree classifier SV_classifier: SVC = SVC(probability=False) Logistic_classifier: LogisticRegression = LogisticRegression() Tree_classifier: DecisionTreeClassifier = DecisionTreeClassifier() - -# Initiate the calibrators for the classifiers +# Initiate the temperature scaling calibrators for the classifiers SVC_scaled: CalibratedClassifierCV_test = CalibratedClassifierCV_test(SV_classifier, cv=3, method='temperature' @@ -43,21 +49,23 @@ method='temperature' ) - -# Fit all classifier-calibrator pairs +# Calibrate the classifiers with temperature scaling +# The calibrators are trained with the output of +# `decision_function` for the support vector classifier +# and logistic regression, while they are trained with +# `predict_proba` for the decision tree classifier. SVC_scaled.fit(X_train,y_train) Logistic_scaled.fit(X_train,y_train) Tree_scaled.fit(X_train,y_train) -# compare_classifier.fit(X_train, y_train) print("Optimal Temperatures For Each Classifiers") print(f"{SVC_scaled.calibrated_classifiers_[0].calibrators[0].T_=}") print(f"{Logistic_scaled.calibrated_classifiers_[0].calibrators[0].T_=}") print(f"{Tree_scaled.calibrated_classifiers_[0].calibrators[0].T_=}") +print('\n') print("Printing calibrated probabilities...") -print(f"{SVC_scaled.predict_proba(X_calib) = }") -print(f"{Logistic_scaled.predict_proba(X_calib) = }") +print(f"{SVC_scaled.predict_proba(X_calib)=}") +print(f"{Logistic_scaled.predict_proba(X_calib)=}") print(f"{Tree_scaled.predict_proba(X_calib)=}") -# print(f"{compare_classifier.predict_proba(X_calib)=}") print(f"{y_calib=}") diff --git a/sklearn/calibration_temperature.py b/sklearn/calibration_temperature.py index b7c656842f559..19c1b430d2175 100644 --- a/sklearn/calibration_temperature.py +++ b/sklearn/calibration_temperature.py @@ -585,7 +585,7 @@ def _fit_classifier_calibrator_pair( test : ndarray, shape (n_test_indices,) Indices of the testing subset. - method : {'sigmoid', 'isotonic'} + method : {'sigmoid', 'isotonic', 'temperature'} Method to use for calibration. classes : ndarray, shape (n_classes,) @@ -646,7 +646,7 @@ def _fit_calibrator(clf, predictions, y, classes, method, sample_weight=None): classes : ndarray, shape (n_classes,) All the prediction classes. - method : {'sigmoid', 'isotonic'} + method : {'sigmoid', 'isotonic', 'temperature'} The method to use for calibration. sample_weight : ndarray, shape (n_samples,), default=None @@ -696,7 +696,7 @@ class _CalibratedClassifier: classes : array-like of shape (n_classes,) All the prediction classes. - method : {'sigmoid', 'isotonic'}, default='sigmoid' + method : {'sigmoid', 'isotonic', 'temperature'}, default='sigmoid' The method to use for calibration. Can be 'sigmoid' which corresponds to Platt's method or 'isotonic' which is a non-parametric approach based on isotonic regression.