From 13f38a15e72e0414a3ecc305b52cb58eb83ca127 Mon Sep 17 00:00:00 2001 From: vitaliset Date: Sun, 9 Jun 2024 01:40:31 -0300 Subject: [PATCH 1/2] moving _curve_scorer class to metrics --- sklearn/metrics/_classification_threshold.py | 122 ++++++++++++++++++ .../tests/test_classification_threshold.py | 100 ++++++++++++++ .../_classification_threshold.py | 122 +----------------- .../tests/test_classification_threshold.py | 94 +------------- 4 files changed, 227 insertions(+), 211 deletions(-) create mode 100644 sklearn/metrics/_classification_threshold.py create mode 100644 sklearn/metrics/tests/test_classification_threshold.py diff --git a/sklearn/metrics/_classification_threshold.py b/sklearn/metrics/_classification_threshold.py new file mode 100644 index 0000000000000..520f75eb5269a --- /dev/null +++ b/sklearn/metrics/_classification_threshold.py @@ -0,0 +1,122 @@ +from numbers import Integral + +import numpy as np + +from ._scorer import _BaseScorer + + +def _threshold_scores_to_class_labels(y_score, threshold, classes, pos_label): + """Threshold `y_score` and return the associated class labels.""" + if pos_label is None: + map_thresholded_score_to_label = np.array([0, 1]) + else: + pos_label_idx = np.flatnonzero(classes == pos_label)[0] + neg_label_idx = np.flatnonzero(classes != pos_label)[0] + map_thresholded_score_to_label = np.array([neg_label_idx, pos_label_idx]) + + return classes[map_thresholded_score_to_label[(y_score >= threshold).astype(int)]] + + +class _CurveScorer(_BaseScorer): + """Scorer taking a continuous response and output a score for each threshold. + + Parameters + ---------- + score_func : callable + The score function to use. It will be called as + `score_func(y_true, y_pred, **kwargs)`. + + sign : int + Either 1 or -1 to returns the score with `sign * score_func(estimator, X, y)`. + Thus, `sign` defined if higher scores are better or worse. + + kwargs : dict + Additional parameters to pass to the score function. + + thresholds : int or array-like + Related to the number of decision thresholds for which we want to compute the + score. If an integer, it will be used to generate `thresholds` thresholds + uniformly distributed between the minimum and maximum predicted scores. If an + array-like, it will be used as the thresholds. + + response_method : str + The method to call on the estimator to get the response values. + """ + + def __init__(self, score_func, sign, kwargs, thresholds, response_method): + super().__init__( + score_func=score_func, + sign=sign, + kwargs=kwargs, + response_method=response_method, + ) + self._thresholds = thresholds + + @classmethod + def from_scorer(cls, scorer, response_method, thresholds): + """Create a continuous scorer from a normal scorer.""" + instance = cls( + score_func=scorer._score_func, + sign=scorer._sign, + response_method=response_method, + thresholds=thresholds, + kwargs=scorer._kwargs, + ) + # transfer the metadata request + instance._metadata_request = scorer._get_metadata_request() + return instance + + def _score(self, method_caller, estimator, X, y_true, **kwargs): + """Evaluate predicted target values for X relative to y_true. + + Parameters + ---------- + method_caller : callable + Returns predictions given an estimator, method name, and other + arguments, potentially caching results. + + estimator : object + Trained estimator to use for scoring. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + Test data that will be fed to estimator.predict. + + y_true : array-like of shape (n_samples,) + Gold standard target values for X. + + **kwargs : dict + Other parameters passed to the scorer. Refer to + :func:`set_score_request` for more details. + + Returns + ------- + scores : ndarray of shape (thresholds,) + The scores associated to each threshold. + + potential_thresholds : ndarray of shape (thresholds,) + The potential thresholds used to compute the scores. + """ + pos_label = self._get_pos_label() + y_score = method_caller( + estimator, self._response_method, X, pos_label=pos_label + ) + + scoring_kwargs = {**self._kwargs, **kwargs} + if isinstance(self._thresholds, Integral): + potential_thresholds = np.linspace( + np.min(y_score), np.max(y_score), self._thresholds + ) + else: + potential_thresholds = np.asarray(self._thresholds) + score_thresholds = [ + self._sign + * self._score_func( + y_true, + _threshold_scores_to_class_labels( + y_score, th, estimator.classes_, pos_label + ), + **scoring_kwargs, + ) + for th in potential_thresholds + ] + return np.array(score_thresholds), potential_thresholds diff --git a/sklearn/metrics/tests/test_classification_threshold.py b/sklearn/metrics/tests/test_classification_threshold.py new file mode 100644 index 0000000000000..e25f61ec803fe --- /dev/null +++ b/sklearn/metrics/tests/test_classification_threshold.py @@ -0,0 +1,100 @@ +import pytest + +from sklearn.datasets import make_classification +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import ( + balanced_accuracy_score, + recall_score, +) +from sklearn.metrics._classification_threshold import _CurveScorer + + +def test_curve_scorer(): + """Check the behaviour of the `_CurveScorer` class.""" + X, y = make_classification(random_state=0) + estimator = LogisticRegression().fit(X, y) + curve_scorer = _CurveScorer( + balanced_accuracy_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={}, + ) + scores, thresholds = curve_scorer(estimator, X, y) + + assert thresholds.shape == scores.shape + # check that the thresholds are probabilities with extreme values close to 0 and 1. + # they are not exactly 0 and 1 because they are the extremum of the + # `estimator.predict_proba(X)` values. + assert 0 <= thresholds.min() <= 0.01 + assert 0.99 <= thresholds.max() <= 1 + # balanced accuracy should be between 0.5 and 1 when it is not adjusted + assert 0.5 <= scores.min() <= 1 + + # check that passing kwargs to the scorer works + curve_scorer = _CurveScorer( + balanced_accuracy_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={"adjusted": True}, + ) + scores, thresholds = curve_scorer(estimator, X, y) + + # balanced accuracy should be between 0.5 and 1 when it is not adjusted + assert 0 <= scores.min() <= 0.5 + + # check that we can inverse the sign of the score when dealing with `neg_*` scorer + curve_scorer = _CurveScorer( + balanced_accuracy_score, + sign=-1, + response_method="predict_proba", + thresholds=10, + kwargs={"adjusted": True}, + ) + scores, thresholds = curve_scorer(estimator, X, y) + + assert all(scores <= 0) + + +def test_curve_scorer_pos_label(global_random_seed): + """Check that we propagate properly the `pos_label` parameter to the scorer.""" + n_samples = 30 + X, y = make_classification( + n_samples=n_samples, weights=[0.9, 0.1], random_state=global_random_seed + ) + estimator = LogisticRegression().fit(X, y) + + curve_scorer = _CurveScorer( + recall_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={"pos_label": 1}, + ) + scores_pos_label_1, thresholds_pos_label_1 = curve_scorer(estimator, X, y) + + curve_scorer = _CurveScorer( + recall_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={"pos_label": 0}, + ) + scores_pos_label_0, thresholds_pos_label_0 = curve_scorer(estimator, X, y) + + # Since `pos_label` is forwarded to the curve_scorer, the thresholds are not equal. + assert not (thresholds_pos_label_1 == thresholds_pos_label_0).all() + # The min-max range for the thresholds is defined by the probabilities of the + # `pos_label` class (the column of `predict_proba`). + y_pred = estimator.predict_proba(X) + assert thresholds_pos_label_0.min() == pytest.approx(y_pred.min(axis=0)[0]) + assert thresholds_pos_label_0.max() == pytest.approx(y_pred.max(axis=0)[0]) + assert thresholds_pos_label_1.min() == pytest.approx(y_pred.min(axis=0)[1]) + assert thresholds_pos_label_1.max() == pytest.approx(y_pred.max(axis=0)[1]) + + # The recall cannot be negative and `pos_label=1` should have a higher recall + # since there is less samples to be considered. + assert 0.0 < scores_pos_label_0.min() < scores_pos_label_1.min() + assert scores_pos_label_0.max() == pytest.approx(1.0) + assert scores_pos_label_1.max() == pytest.approx(1.0) diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index 1d221d3388434..5d49aeb7a5839 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -15,7 +15,10 @@ check_scoring, get_scorer_names, ) -from ..metrics._scorer import _BaseScorer +from ..metrics._classification_threshold import ( + _CurveScorer, + _threshold_scores_to_class_labels, +) from ..utils import _safe_indexing from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions from ..utils._response import _get_response_values_binary @@ -54,18 +57,6 @@ def check(self): return check -def _threshold_scores_to_class_labels(y_score, threshold, classes, pos_label): - """Threshold `y_score` and return the associated class labels.""" - if pos_label is None: - map_thresholded_score_to_label = np.array([0, 1]) - else: - pos_label_idx = np.flatnonzero(classes == pos_label)[0] - neg_label_idx = np.flatnonzero(classes != pos_label)[0] - map_thresholded_score_to_label = np.array([neg_label_idx, pos_label_idx]) - - return classes[map_thresholded_score_to_label[(y_score >= threshold).astype(int)]] - - class BaseThresholdClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator): """Base class for binary classifiers that set a non-default decision threshold. @@ -426,111 +417,6 @@ def get_metadata_routing(self): return router -class _CurveScorer(_BaseScorer): - """Scorer taking a continuous response and output a score for each threshold. - - Parameters - ---------- - score_func : callable - The score function to use. It will be called as - `score_func(y_true, y_pred, **kwargs)`. - - sign : int - Either 1 or -1 to returns the score with `sign * score_func(estimator, X, y)`. - Thus, `sign` defined if higher scores are better or worse. - - kwargs : dict - Additional parameters to pass to the score function. - - thresholds : int or array-like - Related to the number of decision thresholds for which we want to compute the - score. If an integer, it will be used to generate `thresholds` thresholds - uniformly distributed between the minimum and maximum predicted scores. If an - array-like, it will be used as the thresholds. - - response_method : str - The method to call on the estimator to get the response values. - """ - - def __init__(self, score_func, sign, kwargs, thresholds, response_method): - super().__init__( - score_func=score_func, - sign=sign, - kwargs=kwargs, - response_method=response_method, - ) - self._thresholds = thresholds - - @classmethod - def from_scorer(cls, scorer, response_method, thresholds): - """Create a continuous scorer from a normal scorer.""" - instance = cls( - score_func=scorer._score_func, - sign=scorer._sign, - response_method=response_method, - thresholds=thresholds, - kwargs=scorer._kwargs, - ) - # transfer the metadata request - instance._metadata_request = scorer._get_metadata_request() - return instance - - def _score(self, method_caller, estimator, X, y_true, **kwargs): - """Evaluate predicted target values for X relative to y_true. - - Parameters - ---------- - method_caller : callable - Returns predictions given an estimator, method name, and other - arguments, potentially caching results. - - estimator : object - Trained estimator to use for scoring. - - X : {array-like, sparse matrix} of shape (n_samples, n_features) - Test data that will be fed to estimator.predict. - - y_true : array-like of shape (n_samples,) - Gold standard target values for X. - - **kwargs : dict - Other parameters passed to the scorer. Refer to - :func:`set_score_request` for more details. - - Returns - ------- - scores : ndarray of shape (thresholds,) - The scores associated to each threshold. - - potential_thresholds : ndarray of shape (thresholds,) - The potential thresholds used to compute the scores. - """ - pos_label = self._get_pos_label() - y_score = method_caller( - estimator, self._response_method, X, pos_label=pos_label - ) - - scoring_kwargs = {**self._kwargs, **kwargs} - if isinstance(self._thresholds, Integral): - potential_thresholds = np.linspace( - np.min(y_score), np.max(y_score), self._thresholds - ) - else: - potential_thresholds = np.asarray(self._thresholds) - score_thresholds = [ - self._sign - * self._score_func( - y_true, - _threshold_scores_to_class_labels( - y_score, th, estimator.classes_, pos_label - ), - **scoring_kwargs, - ) - for th in potential_thresholds - ] - return np.array(score_thresholds), potential_thresholds - - def _fit_and_score_over_thresholds( classifier, X, diff --git a/sklearn/model_selection/tests/test_classification_threshold.py b/sklearn/model_selection/tests/test_classification_threshold.py index 77c4c20e99ef2..2f6b49f695f05 100644 --- a/sklearn/model_selection/tests/test_classification_threshold.py +++ b/sklearn/model_selection/tests/test_classification_threshold.py @@ -17,15 +17,14 @@ f1_score, fbeta_score, make_scorer, - recall_score, ) +from sklearn.metrics._classification_threshold import _CurveScorer from sklearn.model_selection import ( FixedThresholdClassifier, StratifiedShuffleSplit, TunedThresholdClassifierCV, ) from sklearn.model_selection._classification_threshold import ( - _CurveScorer, _fit_and_score_over_thresholds, ) from sklearn.pipeline import make_pipeline @@ -40,97 +39,6 @@ ) -def test_curve_scorer(): - """Check the behaviour of the `_CurveScorer` class.""" - X, y = make_classification(random_state=0) - estimator = LogisticRegression().fit(X, y) - curve_scorer = _CurveScorer( - balanced_accuracy_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={}, - ) - scores, thresholds = curve_scorer(estimator, X, y) - - assert thresholds.shape == scores.shape - # check that the thresholds are probabilities with extreme values close to 0 and 1. - # they are not exactly 0 and 1 because they are the extremum of the - # `estimator.predict_proba(X)` values. - assert 0 <= thresholds.min() <= 0.01 - assert 0.99 <= thresholds.max() <= 1 - # balanced accuracy should be between 0.5 and 1 when it is not adjusted - assert 0.5 <= scores.min() <= 1 - - # check that passing kwargs to the scorer works - curve_scorer = _CurveScorer( - balanced_accuracy_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={"adjusted": True}, - ) - scores, thresholds = curve_scorer(estimator, X, y) - - # balanced accuracy should be between 0.5 and 1 when it is not adjusted - assert 0 <= scores.min() <= 0.5 - - # check that we can inverse the sign of the score when dealing with `neg_*` scorer - curve_scorer = _CurveScorer( - balanced_accuracy_score, - sign=-1, - response_method="predict_proba", - thresholds=10, - kwargs={"adjusted": True}, - ) - scores, thresholds = curve_scorer(estimator, X, y) - - assert all(scores <= 0) - - -def test_curve_scorer_pos_label(global_random_seed): - """Check that we propagate properly the `pos_label` parameter to the scorer.""" - n_samples = 30 - X, y = make_classification( - n_samples=n_samples, weights=[0.9, 0.1], random_state=global_random_seed - ) - estimator = LogisticRegression().fit(X, y) - - curve_scorer = _CurveScorer( - recall_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={"pos_label": 1}, - ) - scores_pos_label_1, thresholds_pos_label_1 = curve_scorer(estimator, X, y) - - curve_scorer = _CurveScorer( - recall_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={"pos_label": 0}, - ) - scores_pos_label_0, thresholds_pos_label_0 = curve_scorer(estimator, X, y) - - # Since `pos_label` is forwarded to the curve_scorer, the thresholds are not equal. - assert not (thresholds_pos_label_1 == thresholds_pos_label_0).all() - # The min-max range for the thresholds is defined by the probabilities of the - # `pos_label` class (the column of `predict_proba`). - y_pred = estimator.predict_proba(X) - assert thresholds_pos_label_0.min() == pytest.approx(y_pred.min(axis=0)[0]) - assert thresholds_pos_label_0.max() == pytest.approx(y_pred.max(axis=0)[0]) - assert thresholds_pos_label_1.min() == pytest.approx(y_pred.min(axis=0)[1]) - assert thresholds_pos_label_1.max() == pytest.approx(y_pred.max(axis=0)[1]) - - # The recall cannot be negative and `pos_label=1` should have a higher recall - # since there is less samples to be considered. - assert 0.0 < scores_pos_label_0.min() < scores_pos_label_1.min() - assert scores_pos_label_0.max() == pytest.approx(1.0) - assert scores_pos_label_1.max() == pytest.approx(1.0) - - def test_fit_and_score_over_thresholds_curve_scorers(): """Check that `_fit_and_score_over_thresholds` returns thresholds in ascending order for the different accepted curve scorers.""" From 63204155e350607058f7ac3f3056b03e9e0bc810 Mon Sep 17 00:00:00 2001 From: vitaliset Date: Tue, 2 Jul 2024 23:39:42 -0300 Subject: [PATCH 2/2] moving files from metrics._classification_threshold to metrics._scorer --- sklearn/metrics/_classification_threshold.py | 122 ------------------ sklearn/metrics/_scorer.py | 120 +++++++++++++++++ .../tests/test_classification_threshold.py | 100 -------------- sklearn/metrics/tests/test_score_objects.py | 92 +++++++++++++ .../_classification_threshold.py | 2 +- .../tests/test_classification_threshold.py | 2 +- 6 files changed, 214 insertions(+), 224 deletions(-) delete mode 100644 sklearn/metrics/_classification_threshold.py delete mode 100644 sklearn/metrics/tests/test_classification_threshold.py diff --git a/sklearn/metrics/_classification_threshold.py b/sklearn/metrics/_classification_threshold.py deleted file mode 100644 index 520f75eb5269a..0000000000000 --- a/sklearn/metrics/_classification_threshold.py +++ /dev/null @@ -1,122 +0,0 @@ -from numbers import Integral - -import numpy as np - -from ._scorer import _BaseScorer - - -def _threshold_scores_to_class_labels(y_score, threshold, classes, pos_label): - """Threshold `y_score` and return the associated class labels.""" - if pos_label is None: - map_thresholded_score_to_label = np.array([0, 1]) - else: - pos_label_idx = np.flatnonzero(classes == pos_label)[0] - neg_label_idx = np.flatnonzero(classes != pos_label)[0] - map_thresholded_score_to_label = np.array([neg_label_idx, pos_label_idx]) - - return classes[map_thresholded_score_to_label[(y_score >= threshold).astype(int)]] - - -class _CurveScorer(_BaseScorer): - """Scorer taking a continuous response and output a score for each threshold. - - Parameters - ---------- - score_func : callable - The score function to use. It will be called as - `score_func(y_true, y_pred, **kwargs)`. - - sign : int - Either 1 or -1 to returns the score with `sign * score_func(estimator, X, y)`. - Thus, `sign` defined if higher scores are better or worse. - - kwargs : dict - Additional parameters to pass to the score function. - - thresholds : int or array-like - Related to the number of decision thresholds for which we want to compute the - score. If an integer, it will be used to generate `thresholds` thresholds - uniformly distributed between the minimum and maximum predicted scores. If an - array-like, it will be used as the thresholds. - - response_method : str - The method to call on the estimator to get the response values. - """ - - def __init__(self, score_func, sign, kwargs, thresholds, response_method): - super().__init__( - score_func=score_func, - sign=sign, - kwargs=kwargs, - response_method=response_method, - ) - self._thresholds = thresholds - - @classmethod - def from_scorer(cls, scorer, response_method, thresholds): - """Create a continuous scorer from a normal scorer.""" - instance = cls( - score_func=scorer._score_func, - sign=scorer._sign, - response_method=response_method, - thresholds=thresholds, - kwargs=scorer._kwargs, - ) - # transfer the metadata request - instance._metadata_request = scorer._get_metadata_request() - return instance - - def _score(self, method_caller, estimator, X, y_true, **kwargs): - """Evaluate predicted target values for X relative to y_true. - - Parameters - ---------- - method_caller : callable - Returns predictions given an estimator, method name, and other - arguments, potentially caching results. - - estimator : object - Trained estimator to use for scoring. - - X : {array-like, sparse matrix} of shape (n_samples, n_features) - Test data that will be fed to estimator.predict. - - y_true : array-like of shape (n_samples,) - Gold standard target values for X. - - **kwargs : dict - Other parameters passed to the scorer. Refer to - :func:`set_score_request` for more details. - - Returns - ------- - scores : ndarray of shape (thresholds,) - The scores associated to each threshold. - - potential_thresholds : ndarray of shape (thresholds,) - The potential thresholds used to compute the scores. - """ - pos_label = self._get_pos_label() - y_score = method_caller( - estimator, self._response_method, X, pos_label=pos_label - ) - - scoring_kwargs = {**self._kwargs, **kwargs} - if isinstance(self._thresholds, Integral): - potential_thresholds = np.linspace( - np.min(y_score), np.max(y_score), self._thresholds - ) - else: - potential_thresholds = np.asarray(self._thresholds) - score_thresholds = [ - self._sign - * self._score_func( - y_true, - _threshold_scores_to_class_labels( - y_score, th, estimator.classes_, pos_label - ), - **scoring_kwargs, - ) - for th in potential_thresholds - ] - return np.array(score_thresholds), potential_thresholds diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index c1a916aa0b5f3..b2f4f7178940b 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -23,8 +23,11 @@ from collections import Counter from functools import partial from inspect import signature +from numbers import Integral from traceback import format_exc +import numpy as np + from ..base import is_regressor from ..utils import Bunch from ..utils._param_validation import HasMethods, Hidden, StrOptions, validate_params @@ -1064,3 +1067,120 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T "If no scoring is specified, the estimator passed should " "have a 'score' method. The estimator %r does not." % estimator ) + + +def _threshold_scores_to_class_labels(y_score, threshold, classes, pos_label): + """Threshold `y_score` and return the associated class labels.""" + if pos_label is None: + map_thresholded_score_to_label = np.array([0, 1]) + else: + pos_label_idx = np.flatnonzero(classes == pos_label)[0] + neg_label_idx = np.flatnonzero(classes != pos_label)[0] + map_thresholded_score_to_label = np.array([neg_label_idx, pos_label_idx]) + + return classes[map_thresholded_score_to_label[(y_score >= threshold).astype(int)]] + + +class _CurveScorer(_BaseScorer): + """Scorer taking a continuous response and output a score for each threshold. + + Parameters + ---------- + score_func : callable + The score function to use. It will be called as + `score_func(y_true, y_pred, **kwargs)`. + + sign : int + Either 1 or -1 to returns the score with `sign * score_func(estimator, X, y)`. + Thus, `sign` defined if higher scores are better or worse. + + kwargs : dict + Additional parameters to pass to the score function. + + thresholds : int or array-like + Related to the number of decision thresholds for which we want to compute the + score. If an integer, it will be used to generate `thresholds` thresholds + uniformly distributed between the minimum and maximum predicted scores. If an + array-like, it will be used as the thresholds. + + response_method : str + The method to call on the estimator to get the response values. + """ + + def __init__(self, score_func, sign, kwargs, thresholds, response_method): + super().__init__( + score_func=score_func, + sign=sign, + kwargs=kwargs, + response_method=response_method, + ) + self._thresholds = thresholds + + @classmethod + def from_scorer(cls, scorer, response_method, thresholds): + """Create a continuous scorer from a normal scorer.""" + instance = cls( + score_func=scorer._score_func, + sign=scorer._sign, + response_method=response_method, + thresholds=thresholds, + kwargs=scorer._kwargs, + ) + # transfer the metadata request + instance._metadata_request = scorer._get_metadata_request() + return instance + + def _score(self, method_caller, estimator, X, y_true, **kwargs): + """Evaluate predicted target values for X relative to y_true. + + Parameters + ---------- + method_caller : callable + Returns predictions given an estimator, method name, and other + arguments, potentially caching results. + + estimator : object + Trained estimator to use for scoring. + + X : {array-like, sparse matrix} of shape (n_samples, n_features) + Test data that will be fed to estimator.predict. + + y_true : array-like of shape (n_samples,) + Gold standard target values for X. + + **kwargs : dict + Other parameters passed to the scorer. Refer to + :func:`set_score_request` for more details. + + Returns + ------- + scores : ndarray of shape (thresholds,) + The scores associated to each threshold. + + potential_thresholds : ndarray of shape (thresholds,) + The potential thresholds used to compute the scores. + """ + pos_label = self._get_pos_label() + y_score = method_caller( + estimator, self._response_method, X, pos_label=pos_label + ) + + scoring_kwargs = {**self._kwargs, **kwargs} + if isinstance(self._thresholds, Integral): + potential_thresholds = np.linspace( + np.min(y_score), np.max(y_score), self._thresholds + ) + else: + potential_thresholds = np.asarray(self._thresholds) + score_thresholds = [ + self._sign + * self._score_func( + y_true, + _threshold_scores_to_class_labels( + y_score, th, estimator.classes_, pos_label + ), + **scoring_kwargs, + ) + for th in potential_thresholds + ] + return np.array(score_thresholds), potential_thresholds diff --git a/sklearn/metrics/tests/test_classification_threshold.py b/sklearn/metrics/tests/test_classification_threshold.py deleted file mode 100644 index e25f61ec803fe..0000000000000 --- a/sklearn/metrics/tests/test_classification_threshold.py +++ /dev/null @@ -1,100 +0,0 @@ -import pytest - -from sklearn.datasets import make_classification -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ( - balanced_accuracy_score, - recall_score, -) -from sklearn.metrics._classification_threshold import _CurveScorer - - -def test_curve_scorer(): - """Check the behaviour of the `_CurveScorer` class.""" - X, y = make_classification(random_state=0) - estimator = LogisticRegression().fit(X, y) - curve_scorer = _CurveScorer( - balanced_accuracy_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={}, - ) - scores, thresholds = curve_scorer(estimator, X, y) - - assert thresholds.shape == scores.shape - # check that the thresholds are probabilities with extreme values close to 0 and 1. - # they are not exactly 0 and 1 because they are the extremum of the - # `estimator.predict_proba(X)` values. - assert 0 <= thresholds.min() <= 0.01 - assert 0.99 <= thresholds.max() <= 1 - # balanced accuracy should be between 0.5 and 1 when it is not adjusted - assert 0.5 <= scores.min() <= 1 - - # check that passing kwargs to the scorer works - curve_scorer = _CurveScorer( - balanced_accuracy_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={"adjusted": True}, - ) - scores, thresholds = curve_scorer(estimator, X, y) - - # balanced accuracy should be between 0.5 and 1 when it is not adjusted - assert 0 <= scores.min() <= 0.5 - - # check that we can inverse the sign of the score when dealing with `neg_*` scorer - curve_scorer = _CurveScorer( - balanced_accuracy_score, - sign=-1, - response_method="predict_proba", - thresholds=10, - kwargs={"adjusted": True}, - ) - scores, thresholds = curve_scorer(estimator, X, y) - - assert all(scores <= 0) - - -def test_curve_scorer_pos_label(global_random_seed): - """Check that we propagate properly the `pos_label` parameter to the scorer.""" - n_samples = 30 - X, y = make_classification( - n_samples=n_samples, weights=[0.9, 0.1], random_state=global_random_seed - ) - estimator = LogisticRegression().fit(X, y) - - curve_scorer = _CurveScorer( - recall_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={"pos_label": 1}, - ) - scores_pos_label_1, thresholds_pos_label_1 = curve_scorer(estimator, X, y) - - curve_scorer = _CurveScorer( - recall_score, - sign=1, - response_method="predict_proba", - thresholds=10, - kwargs={"pos_label": 0}, - ) - scores_pos_label_0, thresholds_pos_label_0 = curve_scorer(estimator, X, y) - - # Since `pos_label` is forwarded to the curve_scorer, the thresholds are not equal. - assert not (thresholds_pos_label_1 == thresholds_pos_label_0).all() - # The min-max range for the thresholds is defined by the probabilities of the - # `pos_label` class (the column of `predict_proba`). - y_pred = estimator.predict_proba(X) - assert thresholds_pos_label_0.min() == pytest.approx(y_pred.min(axis=0)[0]) - assert thresholds_pos_label_0.max() == pytest.approx(y_pred.max(axis=0)[0]) - assert thresholds_pos_label_1.min() == pytest.approx(y_pred.min(axis=0)[1]) - assert thresholds_pos_label_1.max() == pytest.approx(y_pred.max(axis=0)[1]) - - # The recall cannot be negative and `pos_label=1` should have a higher recall - # since there is less samples to be considered. - assert 0.0 < scores_pos_label_0.min() < scores_pos_label_1.min() - assert scores_pos_label_0.max() == pytest.approx(1.0) - assert scores_pos_label_1.max() == pytest.approx(1.0) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index bfe8f57f92265..4fc295a10d9fa 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -43,6 +43,7 @@ from sklearn.metrics import cluster as cluster_module from sklearn.metrics._scorer import ( _check_multimetric_scoring, + _CurveScorer, _MultimetricScorer, _PassthroughScorer, _Scorer, @@ -1598,3 +1599,94 @@ def test_metadata_routing_multimetric_metadata_routing(enable_metadata_routing): multimetric_scorer = _MultimetricScorer(scorers={"acc": get_scorer("accuracy")}) with config_context(enable_metadata_routing=enable_metadata_routing): multimetric_scorer(estimator, X, y) + + +def test_curve_scorer(): + """Check the behaviour of the `_CurveScorer` class.""" + X, y = make_classification(random_state=0) + estimator = LogisticRegression().fit(X, y) + curve_scorer = _CurveScorer( + balanced_accuracy_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={}, + ) + scores, thresholds = curve_scorer(estimator, X, y) + + assert thresholds.shape == scores.shape + # check that the thresholds are probabilities with extreme values close to 0 and 1. + # they are not exactly 0 and 1 because they are the extremum of the + # `estimator.predict_proba(X)` values. + assert 0 <= thresholds.min() <= 0.01 + assert 0.99 <= thresholds.max() <= 1 + # balanced accuracy should be between 0.5 and 1 when it is not adjusted + assert 0.5 <= scores.min() <= 1 + + # check that passing kwargs to the scorer works + curve_scorer = _CurveScorer( + balanced_accuracy_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={"adjusted": True}, + ) + scores, thresholds = curve_scorer(estimator, X, y) + + # balanced accuracy should be between 0.5 and 1 when it is not adjusted + assert 0 <= scores.min() <= 0.5 + + # check that we can inverse the sign of the score when dealing with `neg_*` scorer + curve_scorer = _CurveScorer( + balanced_accuracy_score, + sign=-1, + response_method="predict_proba", + thresholds=10, + kwargs={"adjusted": True}, + ) + scores, thresholds = curve_scorer(estimator, X, y) + + assert all(scores <= 0) + + +def test_curve_scorer_pos_label(global_random_seed): + """Check that we propagate properly the `pos_label` parameter to the scorer.""" + n_samples = 30 + X, y = make_classification( + n_samples=n_samples, weights=[0.9, 0.1], random_state=global_random_seed + ) + estimator = LogisticRegression().fit(X, y) + + curve_scorer = _CurveScorer( + recall_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={"pos_label": 1}, + ) + scores_pos_label_1, thresholds_pos_label_1 = curve_scorer(estimator, X, y) + + curve_scorer = _CurveScorer( + recall_score, + sign=1, + response_method="predict_proba", + thresholds=10, + kwargs={"pos_label": 0}, + ) + scores_pos_label_0, thresholds_pos_label_0 = curve_scorer(estimator, X, y) + + # Since `pos_label` is forwarded to the curve_scorer, the thresholds are not equal. + assert not (thresholds_pos_label_1 == thresholds_pos_label_0).all() + # The min-max range for the thresholds is defined by the probabilities of the + # `pos_label` class (the column of `predict_proba`). + y_pred = estimator.predict_proba(X) + assert thresholds_pos_label_0.min() == pytest.approx(y_pred.min(axis=0)[0]) + assert thresholds_pos_label_0.max() == pytest.approx(y_pred.max(axis=0)[0]) + assert thresholds_pos_label_1.min() == pytest.approx(y_pred.min(axis=0)[1]) + assert thresholds_pos_label_1.max() == pytest.approx(y_pred.max(axis=0)[1]) + + # The recall cannot be negative and `pos_label=1` should have a higher recall + # since there is less samples to be considered. + assert 0.0 < scores_pos_label_0.min() < scores_pos_label_1.min() + assert scores_pos_label_0.max() == pytest.approx(1.0) + assert scores_pos_label_1.max() == pytest.approx(1.0) diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py index 5d49aeb7a5839..c2f58e7337676 100644 --- a/sklearn/model_selection/_classification_threshold.py +++ b/sklearn/model_selection/_classification_threshold.py @@ -15,7 +15,7 @@ check_scoring, get_scorer_names, ) -from ..metrics._classification_threshold import ( +from ..metrics._scorer import ( _CurveScorer, _threshold_scores_to_class_labels, ) diff --git a/sklearn/model_selection/tests/test_classification_threshold.py b/sklearn/model_selection/tests/test_classification_threshold.py index 2f6b49f695f05..a7bc6f86f1248 100644 --- a/sklearn/model_selection/tests/test_classification_threshold.py +++ b/sklearn/model_selection/tests/test_classification_threshold.py @@ -18,7 +18,7 @@ fbeta_score, make_scorer, ) -from sklearn.metrics._classification_threshold import _CurveScorer +from sklearn.metrics._scorer import _CurveScorer from sklearn.model_selection import ( FixedThresholdClassifier, StratifiedShuffleSplit,