Skip to content

MNT Moving _CurveScorer from model_selection to metrics #29216

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions sklearn/metrics/_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
from collections import Counter
from functools import partial
from inspect import signature
from numbers import Integral
from traceback import format_exc

import numpy as np

from ..base import is_regressor
from ..utils import Bunch
from ..utils._param_validation import HasMethods, Hidden, StrOptions, validate_params
Expand Down Expand Up @@ -1064,3 +1067,120 @@ def check_scoring(estimator=None, scoring=None, *, allow_none=False, raise_exc=T
"If no scoring is specified, the estimator passed should "
"have a 'score' method. The estimator %r does not." % estimator
)


def _threshold_scores_to_class_labels(y_score, threshold, classes, pos_label):
"""Threshold `y_score` and return the associated class labels."""
if pos_label is None:
map_thresholded_score_to_label = np.array([0, 1])
else:
pos_label_idx = np.flatnonzero(classes == pos_label)[0]
neg_label_idx = np.flatnonzero(classes != pos_label)[0]
map_thresholded_score_to_label = np.array([neg_label_idx, pos_label_idx])

return classes[map_thresholded_score_to_label[(y_score >= threshold).astype(int)]]


class _CurveScorer(_BaseScorer):
"""Scorer taking a continuous response and output a score for each threshold.

Parameters
----------
score_func : callable
The score function to use. It will be called as
`score_func(y_true, y_pred, **kwargs)`.

sign : int
Either 1 or -1 to returns the score with `sign * score_func(estimator, X, y)`.
Thus, `sign` defined if higher scores are better or worse.

kwargs : dict
Additional parameters to pass to the score function.

thresholds : int or array-like
Related to the number of decision thresholds for which we want to compute the
score. If an integer, it will be used to generate `thresholds` thresholds
uniformly distributed between the minimum and maximum predicted scores. If an
array-like, it will be used as the thresholds.

response_method : str
The method to call on the estimator to get the response values.
"""

def __init__(self, score_func, sign, kwargs, thresholds, response_method):
super().__init__(
score_func=score_func,
sign=sign,
kwargs=kwargs,
response_method=response_method,
)
self._thresholds = thresholds

@classmethod
def from_scorer(cls, scorer, response_method, thresholds):
"""Create a continuous scorer from a normal scorer."""
instance = cls(
score_func=scorer._score_func,
sign=scorer._sign,
response_method=response_method,
thresholds=thresholds,
kwargs=scorer._kwargs,
)
# transfer the metadata request
instance._metadata_request = scorer._get_metadata_request()
return instance

def _score(self, method_caller, estimator, X, y_true, **kwargs):
"""Evaluate predicted target values for X relative to y_true.

Parameters
----------
method_caller : callable
Returns predictions given an estimator, method name, and other
arguments, potentially caching results.

estimator : object
Trained estimator to use for scoring.

X : {array-like, sparse matrix} of shape (n_samples, n_features)
Test data that will be fed to estimator.predict.

y_true : array-like of shape (n_samples,)
Gold standard target values for X.

**kwargs : dict
Other parameters passed to the scorer. Refer to
:func:`set_score_request` for more details.

Returns
-------
scores : ndarray of shape (thresholds,)
The scores associated to each threshold.

potential_thresholds : ndarray of shape (thresholds,)
The potential thresholds used to compute the scores.
"""
pos_label = self._get_pos_label()
y_score = method_caller(
estimator, self._response_method, X, pos_label=pos_label
)

scoring_kwargs = {**self._kwargs, **kwargs}
if isinstance(self._thresholds, Integral):
potential_thresholds = np.linspace(
np.min(y_score), np.max(y_score), self._thresholds
)
else:
potential_thresholds = np.asarray(self._thresholds)
score_thresholds = [
self._sign
* self._score_func(
y_true,
_threshold_scores_to_class_labels(
y_score, th, estimator.classes_, pos_label
),
**scoring_kwargs,
)
for th in potential_thresholds
]
return np.array(score_thresholds), potential_thresholds
92 changes: 92 additions & 0 deletions sklearn/metrics/tests/test_score_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from sklearn.metrics import cluster as cluster_module
from sklearn.metrics._scorer import (
_check_multimetric_scoring,
_CurveScorer,
_MultimetricScorer,
_PassthroughScorer,
_Scorer,
Expand Down Expand Up @@ -1598,3 +1599,94 @@ def test_metadata_routing_multimetric_metadata_routing(enable_metadata_routing):
multimetric_scorer = _MultimetricScorer(scorers={"acc": get_scorer("accuracy")})
with config_context(enable_metadata_routing=enable_metadata_routing):
multimetric_scorer(estimator, X, y)


def test_curve_scorer():
"""Check the behaviour of the `_CurveScorer` class."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression().fit(X, y)
curve_scorer = _CurveScorer(
balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores, thresholds = curve_scorer(estimator, X, y)

assert thresholds.shape == scores.shape
# check that the thresholds are probabilities with extreme values close to 0 and 1.
# they are not exactly 0 and 1 because they are the extremum of the
# `estimator.predict_proba(X)` values.
assert 0 <= thresholds.min() <= 0.01
assert 0.99 <= thresholds.max() <= 1
# balanced accuracy should be between 0.5 and 1 when it is not adjusted
assert 0.5 <= scores.min() <= 1

# check that passing kwargs to the scorer works
curve_scorer = _CurveScorer(
balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={"adjusted": True},
)
scores, thresholds = curve_scorer(estimator, X, y)

# balanced accuracy should be between 0.5 and 1 when it is not adjusted
assert 0 <= scores.min() <= 0.5

# check that we can inverse the sign of the score when dealing with `neg_*` scorer
curve_scorer = _CurveScorer(
balanced_accuracy_score,
sign=-1,
response_method="predict_proba",
thresholds=10,
kwargs={"adjusted": True},
)
scores, thresholds = curve_scorer(estimator, X, y)

assert all(scores <= 0)


def test_curve_scorer_pos_label(global_random_seed):
"""Check that we propagate properly the `pos_label` parameter to the scorer."""
n_samples = 30
X, y = make_classification(
n_samples=n_samples, weights=[0.9, 0.1], random_state=global_random_seed
)
estimator = LogisticRegression().fit(X, y)

curve_scorer = _CurveScorer(
recall_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={"pos_label": 1},
)
scores_pos_label_1, thresholds_pos_label_1 = curve_scorer(estimator, X, y)

curve_scorer = _CurveScorer(
recall_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={"pos_label": 0},
)
scores_pos_label_0, thresholds_pos_label_0 = curve_scorer(estimator, X, y)

# Since `pos_label` is forwarded to the curve_scorer, the thresholds are not equal.
assert not (thresholds_pos_label_1 == thresholds_pos_label_0).all()
# The min-max range for the thresholds is defined by the probabilities of the
# `pos_label` class (the column of `predict_proba`).
y_pred = estimator.predict_proba(X)
assert thresholds_pos_label_0.min() == pytest.approx(y_pred.min(axis=0)[0])
assert thresholds_pos_label_0.max() == pytest.approx(y_pred.max(axis=0)[0])
assert thresholds_pos_label_1.min() == pytest.approx(y_pred.min(axis=0)[1])
assert thresholds_pos_label_1.max() == pytest.approx(y_pred.max(axis=0)[1])

# The recall cannot be negative and `pos_label=1` should have a higher recall
# since there is less samples to be considered.
assert 0.0 < scores_pos_label_0.min() < scores_pos_label_1.min()
assert scores_pos_label_0.max() == pytest.approx(1.0)
assert scores_pos_label_1.max() == pytest.approx(1.0)
122 changes: 4 additions & 118 deletions sklearn/model_selection/_classification_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
check_scoring,
get_scorer_names,
)
from ..metrics._scorer import _BaseScorer
from ..metrics._scorer import (
_CurveScorer,
_threshold_scores_to_class_labels,
)
from ..utils import _safe_indexing
from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
from ..utils._response import _get_response_values_binary
Expand Down Expand Up @@ -54,18 +57,6 @@ def check(self):
return check


def _threshold_scores_to_class_labels(y_score, threshold, classes, pos_label):
"""Threshold `y_score` and return the associated class labels."""
if pos_label is None:
map_thresholded_score_to_label = np.array([0, 1])
else:
pos_label_idx = np.flatnonzero(classes == pos_label)[0]
neg_label_idx = np.flatnonzero(classes != pos_label)[0]
map_thresholded_score_to_label = np.array([neg_label_idx, pos_label_idx])

return classes[map_thresholded_score_to_label[(y_score >= threshold).astype(int)]]


class BaseThresholdClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
"""Base class for binary classifiers that set a non-default decision threshold.

Expand Down Expand Up @@ -426,111 +417,6 @@ def get_metadata_routing(self):
return router


class _CurveScorer(_BaseScorer):
"""Scorer taking a continuous response and output a score for each threshold.

Parameters
----------
score_func : callable
The score function to use. It will be called as
`score_func(y_true, y_pred, **kwargs)`.

sign : int
Either 1 or -1 to returns the score with `sign * score_func(estimator, X, y)`.
Thus, `sign` defined if higher scores are better or worse.

kwargs : dict
Additional parameters to pass to the score function.

thresholds : int or array-like
Related to the number of decision thresholds for which we want to compute the
score. If an integer, it will be used to generate `thresholds` thresholds
uniformly distributed between the minimum and maximum predicted scores. If an
array-like, it will be used as the thresholds.

response_method : str
The method to call on the estimator to get the response values.
"""

def __init__(self, score_func, sign, kwargs, thresholds, response_method):
super().__init__(
score_func=score_func,
sign=sign,
kwargs=kwargs,
response_method=response_method,
)
self._thresholds = thresholds

@classmethod
def from_scorer(cls, scorer, response_method, thresholds):
"""Create a continuous scorer from a normal scorer."""
instance = cls(
score_func=scorer._score_func,
sign=scorer._sign,
response_method=response_method,
thresholds=thresholds,
kwargs=scorer._kwargs,
)
# transfer the metadata request
instance._metadata_request = scorer._get_metadata_request()
return instance

def _score(self, method_caller, estimator, X, y_true, **kwargs):
"""Evaluate predicted target values for X relative to y_true.

Parameters
----------
method_caller : callable
Returns predictions given an estimator, method name, and other
arguments, potentially caching results.

estimator : object
Trained estimator to use for scoring.

X : {array-like, sparse matrix} of shape (n_samples, n_features)
Test data that will be fed to estimator.predict.

y_true : array-like of shape (n_samples,)
Gold standard target values for X.

**kwargs : dict
Other parameters passed to the scorer. Refer to
:func:`set_score_request` for more details.

Returns
-------
scores : ndarray of shape (thresholds,)
The scores associated to each threshold.

potential_thresholds : ndarray of shape (thresholds,)
The potential thresholds used to compute the scores.
"""
pos_label = self._get_pos_label()
y_score = method_caller(
estimator, self._response_method, X, pos_label=pos_label
)

scoring_kwargs = {**self._kwargs, **kwargs}
if isinstance(self._thresholds, Integral):
potential_thresholds = np.linspace(
np.min(y_score), np.max(y_score), self._thresholds
)
else:
potential_thresholds = np.asarray(self._thresholds)
score_thresholds = [
self._sign
* self._score_func(
y_true,
_threshold_scores_to_class_labels(
y_score, th, estimator.classes_, pos_label
),
**scoring_kwargs,
)
for th in potential_thresholds
]
return np.array(score_thresholds), potential_thresholds


def _fit_and_score_over_thresholds(
classifier,
X,
Expand Down
Loading