Skip to content

MNT Refactor scorer using _get_response #18212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats_new/v0.24.rst
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,13 @@ Changelog
class to be used when computing the roc auc statistics.
:pr:`17651` by :user:`Clara Matos <claramatos>`.

- |Fix| Fix scorers that accept a pos_label parameter and compute their metrics
from values returned by `decision_function` or `predict_proba`. Previously,
they would return erroneous values when pos_label was not corresponding to
`classifier.classes_[1]`. This is especially important when training
classifiers directly with string labeled target classes.
:pr:`#18114` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.model_selection`
..............................

Expand Down
146 changes: 145 additions & 1 deletion sklearn/metrics/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

import numpy as np

from ..utils import check_array, check_consistent_length
from ..base import is_classifier
from ..utils import check_array
from ..utils import check_consistent_length
from ..utils.multiclass import type_of_target


Expand Down Expand Up @@ -200,3 +202,145 @@ def _average_multiclass_ovo_score(binary_metric, y_true, y_score,
pair_scores[ix] = (a_true_score + b_true_score) / 2

return np.average(pair_scores, weights=prevalence)


def _check_classifier_response_method(estimator, response_method):
"""Return prediction method from the response_method

Parameters
----------
estimator : estimator instance
Classifier to check.

response_method : {'auto', 'predict_proba', 'decision_function', 'predict'}
Specifies whether to use :term:`predict_proba` or
:term:`decision_function` as the target response. If set to 'auto',
:term:`predict_proba` is tried first and if it does not exist
:term:`decision_function` is tried next and :term:`predict` last.

Returns
-------
prediction_method : callable
Prediction method of estimator.
"""

possible_response_methods = (
"predict", "predict_proba", "decision_function", "auto"
)
if response_method not in possible_response_methods:
raise ValueError(
f"response_method must be one of "
f"{','.join(possible_response_methods)}."
)

error_msg = "response method {} is not defined in {}"
if response_method != "auto":
prediction_method = getattr(estimator, response_method, None)
if prediction_method is None:
raise ValueError(
error_msg.format(response_method, estimator.__class__.__name__)
)
else:
predict_proba = getattr(estimator, 'predict_proba', None)
decision_function = getattr(estimator, 'decision_function', None)
predict = getattr(estimator, 'predict', None)
prediction_method = predict_proba or decision_function or predict
if prediction_method is None:
raise ValueError(
error_msg.format(
"decision_function, predict_proba or predict",
estimator.__class__.__name__
)
)

return prediction_method


def _get_response(
estimator,
X,
y_true,
response_method,
pos_label=None,
support_multi_class=False,
):
"""Return response and positive label.

Parameters
----------
estimator : estimator instance
Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`
in which the last estimator is a classifier.

X : {array-like, sparse matrix} of shape (n_samples, n_features)
Input values.

y_true : array-like of shape (n_samples,)
The true label.

response_method: {'auto', 'predict_proba', 'decision_function', 'predict'}
Specifies whether to use :term:`predict_proba` or
:term:`decision_function` as the target response. If set to 'auto',
:term:`predict_proba` is tried first and if it does not exist
:term:`decision_function` is tried next and :term:`predict` last.

pos_label : str or int, default=None
The class considered as the positive class when computing
the metrics. By default, `estimators.classes_[1]` is
considered as the positive class.

support_multi_class : bool, default=False
...

Returns
-------
y_pred : ndarray of shape (n_samples,)
Target scores calculated from the provided response_method
and pos_label.

pos_label : str or int
The class considered as the positive class when computing
the metrics.
"""
if is_classifier(estimator):
y_type = type_of_target(y_true)
classes = estimator.classes_
prediction_method = _check_classifier_response_method(
estimator, response_method
)
y_pred = prediction_method(X)

if pos_label is not None and pos_label not in classes:
raise ValueError(
f"The class provided by 'pos_label' is unknown. Got "
f"{pos_label} instead of one of {classes}."
)

if prediction_method.__name__ == "predict_proba":
if y_type == "binary":
pos_label = pos_label if pos_label is not None else classes[-1]
if y_pred.shape[1] == 2:
col_idx = np.flatnonzero(classes == pos_label)[0]
y_pred = y_pred[:, col_idx]
else:
err_msg = (
f"Got predict_proba of shape {y_pred.shape}, but need "
f"classifier with two classes"
)
if support_multi_class and y_pred.shape[1] == 1:
raise ValueError(err_msg)
elif not support_multi_class:
raise ValueError(err_msg)
elif prediction_method.__name__ == "decision_function":
if y_type == "binary":
pos_label = pos_label if pos_label is not None else classes[-1]
if pos_label == classes[0]:
y_pred *= -1
else:
if response_method not in ("predict", "auto"):
raise ValueError(
f"{estimator.__class__.__name__} should be a classifier"
)
y_pred, pos_label = estimator.predict(X), None

return y_pred, pos_label
2 changes: 1 addition & 1 deletion sklearn/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -1252,7 +1252,7 @@ def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label):
str(average_options))

y_type, y_true, y_pred = _check_targets(y_true, y_pred)
present_labels = unique_labels(y_true, y_pred)
present_labels = unique_labels(y_true, y_pred).tolist()
if average == 'binary':
if y_type == 'binary':
if pos_label not in present_labels:
Expand Down
114 changes: 0 additions & 114 deletions sklearn/metrics/_plot/base.py

This file was deleted.

2 changes: 1 addition & 1 deletion sklearn/metrics/_plot/precision_recall_curve.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .base import _get_response
from .._base import _get_response

from .. import average_precision_score
from .. import precision_recall_curve
Expand Down
2 changes: 1 addition & 1 deletion sklearn/metrics/_plot/roc_curve.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .base import _get_response
from .._base import _get_response

from .. import auc
from .. import roc_curve
Expand Down
Loading