scikit-learn · glemaitre · Aug 7, 2020 · Aug 7, 2020 · Aug 7, 2020 · Aug 7, 2020
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -268,6 +268,13 @@ Changelog
   class to be used when computing the roc auc statistics.
   :pr:`17651` by :user:`Clara Matos <claramatos>`.
 
+- |Fix| Fix scorers that accept a pos_label parameter and compute their metrics
+  from values returned by `decision_function` or `predict_proba`. Previously,
+  they would return erroneous values when pos_label was not corresponding to
+  `classifier.classes_[1]`. This is especially important when training
+  classifiers directly with string labeled target classes.
+  :pr:`#18114` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 

diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py
@@ -16,7 +16,9 @@
 
 import numpy as np
 
-from ..utils import check_array, check_consistent_length
+from ..base import is_classifier
+from ..utils import check_array
+from ..utils import check_consistent_length
 from ..utils.multiclass import type_of_target
 
 
@@ -200,3 +202,145 @@ def _average_multiclass_ovo_score(binary_metric, y_true, y_score,
         pair_scores[ix] = (a_true_score + b_true_score) / 2
 
     return np.average(pair_scores, weights=prevalence)
+
+
+def _check_classifier_response_method(estimator, response_method):
+    """Return prediction method from the response_method
+
+    Parameters
+    ----------
+    estimator : estimator instance
+        Classifier to check.
+
+    response_method : {'auto', 'predict_proba', 'decision_function', 'predict'}
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. If set to 'auto',
+        :term:`predict_proba` is tried first and if it does not exist
+        :term:`decision_function` is tried next and :term:`predict` last.
+
+    Returns
+    -------
+    prediction_method : callable
+        Prediction method of estimator.
+    """
+
+    possible_response_methods = (
+        "predict", "predict_proba", "decision_function", "auto"
+    )
+    if response_method not in possible_response_methods:
+        raise ValueError(
+            f"response_method must be one of "
+            f"{','.join(possible_response_methods)}."
+        )
+
+    error_msg = "response method {} is not defined in {}"
+    if response_method != "auto":
+        prediction_method = getattr(estimator, response_method, None)
+        if prediction_method is None:
+            raise ValueError(
+                error_msg.format(response_method, estimator.__class__.__name__)
+            )
+    else:
+        predict_proba = getattr(estimator, 'predict_proba', None)
+        decision_function = getattr(estimator, 'decision_function', None)
+        predict = getattr(estimator, 'predict', None)
+        prediction_method = predict_proba or decision_function or predict
+        if prediction_method is None:
+            raise ValueError(
+                error_msg.format(
+                    "decision_function, predict_proba or predict",
+                    estimator.__class__.__name__
+                )
+            )
+
+    return prediction_method
+
+
+def _get_response(
+    estimator,
+    X,
+    y_true,
+    response_method,
+    pos_label=None,
+    support_multi_class=False,
+):
+    """Return response and positive label.
+
+    Parameters
+    ----------
+    estimator : estimator instance
+        Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`
+        in which the last estimator is a classifier.
+
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Input values.
+
+    y_true : array-like of shape (n_samples,)
+        The true label.
+
+    response_method: {'auto', 'predict_proba', 'decision_function', 'predict'}
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. If set to 'auto',
+        :term:`predict_proba` is tried first and if it does not exist
+        :term:`decision_function` is tried next and :term:`predict` last.
+
+    pos_label : str or int, default=None
+        The class considered as the positive class when computing
+        the metrics. By default, `estimators.classes_[1]` is
+        considered as the positive class.
+
+    support_multi_class : bool, default=False
+        ...
+
+    Returns
+    -------
+    y_pred : ndarray of shape (n_samples,)
+        Target scores calculated from the provided response_method
+        and pos_label.
+
+    pos_label : str or int
+        The class considered as the positive class when computing
+        the metrics.
+    """
+    if is_classifier(estimator):
+        y_type = type_of_target(y_true)
+        classes = estimator.classes_
+        prediction_method = _check_classifier_response_method(
+            estimator, response_method
+        )
+        y_pred = prediction_method(X)
+
+        if pos_label is not None and pos_label not in classes:
+            raise ValueError(
+                f"The class provided by 'pos_label' is unknown. Got "
+                f"{pos_label} instead of one of {classes}."
+            )
+
+        if prediction_method.__name__ == "predict_proba":
+            if y_type == "binary":
+                pos_label = pos_label if pos_label is not None else classes[-1]
+                if y_pred.shape[1] == 2:
+                    col_idx = np.flatnonzero(classes == pos_label)[0]
+                    y_pred = y_pred[:, col_idx]
+                else:
+                    err_msg = (
+                        f"Got predict_proba of shape {y_pred.shape}, but need "
+                        f"classifier with two classes"
+                    )
+                    if support_multi_class and y_pred.shape[1] == 1:
+                        raise ValueError(err_msg)
+                    elif not support_multi_class:
+                        raise ValueError(err_msg)
+        elif prediction_method.__name__ == "decision_function":
+            if y_type == "binary":
+                pos_label = pos_label if pos_label is not None else classes[-1]
+                if pos_label == classes[0]:
+                    y_pred *= -1
+    else:
+        if response_method not in ("predict", "auto"):
+            raise ValueError(
+                f"{estimator.__class__.__name__} should be a classifier"
+            )
+        y_pred, pos_label = estimator.predict(X), None
+
+    return y_pred, pos_label
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
@@ -1252,7 +1252,7 @@ def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label):
                          str(average_options))
 
     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
-    present_labels = unique_labels(y_true, y_pred)
+    present_labels = unique_labels(y_true, y_pred).tolist()
     if average == 'binary':
         if y_type == 'binary':
             if pos_label not in present_labels:

diff --git a/sklearn/metrics/_plot/base.py b/sklearn/metrics/_plot/base.py
diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py
@@ -1,4 +1,4 @@
-from .base import _get_response
+from .._base import _get_response
 
 from .. import average_precision_score
 from .. import precision_recall_curve

diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py
@@ -1,4 +1,4 @@
-from .base import _get_response
+from .._base import _get_response
 
 from .. import auc
 from .. import roc_curve