scikit-learn · glemaitre · Jun 24, 2020 · Jun 24, 2020
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -122,9 +122,6 @@ Changelog
   class to be used when computing the precision and recall statistics.
   :pr:`17569` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Enhancement| Add `pos_label` parameter to :func:`roc_auc_score`.
-  :pr:`17594` by :user:`Guillaume Lemaitre <glemaitre>`.
-
 :mod:`sklearn.model_selection`
 ..............................
 

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -218,16 +218,14 @@ def _binary_uninterpolated_average_precision(
                                  average, sample_weight=sample_weight)
 
 
-def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None,
-                          pos_label=None):
+def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None):
     """Binary roc auc score"""
     if len(np.unique(y_true)) != 2:
         raise ValueError("Only one class present in y_true. ROC AUC score "
                          "is not defined in that case.")
 
-    fpr, tpr, _ = roc_curve(
-        y_true, y_score, sample_weight=sample_weight, pos_label=pos_label,
-    )
+    fpr, tpr, _ = roc_curve(y_true, y_score,
+                            sample_weight=sample_weight)
     if max_fpr is None or max_fpr == 1:
         return auc(fpr, tpr)
     if max_fpr <= 0 or max_fpr > 1:
@@ -250,8 +248,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None,
 
 @_deprecate_positional_args
 def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
-                  max_fpr=None, multi_class="raise", labels=None,
-                  pos_label=None):
+                  max_fpr=None, multi_class="raise", labels=None):
     """Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
     from prediction scores.
 
@@ -330,13 +327,6 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
         If ``None``, the numerical or lexicographical order of the labels in
         ``y_true`` is used.
 
-    pos_label : int or str, default=None
-        The label of the positive class in the binary case. When
-        `pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is
-        set to 1, otherwise an error will be raised.
-
-        .. versionadded:: 0.24
-
     Returns
     -------
     auc : float
@@ -398,9 +388,10 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
         return _multiclass_roc_auc_score(y_true, y_score, labels,
                                          multi_class, average, sample_weight)
     elif y_type == "binary":
+        labels = np.unique(y_true)
+        y_true = label_binarize(y_true, classes=labels)[:, 0]
         return _average_binary_score(partial(_binary_roc_auc_score,
-                                             max_fpr=max_fpr,
-                                             pos_label=pos_label),
+                                             max_fpr=max_fpr),
                                      y_true, y_score, average,
                                      sample_weight=sample_weight)
     else:  # multilabel-indicator

diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -319,17 +319,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
 # Metrics with a "pos_label" argument
 METRICS_WITH_POS_LABEL = {
     "roc_curve",
-
-    "roc_auc_score",
-    "weighted_roc_auc",
-    "samples_roc_auc",
-    "micro_roc_auc",
-    "ovr_roc_auc",
-    "weighted_ovr_roc_auc",
-    "ovo_roc_auc",
-    "weighted_ovo_roc_auc",
-    "partial_roc_auc",
-
     "precision_recall_curve",
 
     "brier_score_loss",

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -7,13 +7,9 @@
 from sklearn import datasets
 from sklearn import svm
 
+from sklearn.utils.extmath import softmax
 from sklearn.datasets import make_multilabel_classification
-from sklearn.datasets import load_breast_cancer
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import train_test_split
 from sklearn.random_projection import _sparse_random_matrix
-from sklearn.utils import shuffle
-from sklearn.utils.extmath import softmax
 from sklearn.utils.validation import check_array, check_consistent_length
 from sklearn.utils.validation import check_random_state
 
@@ -1473,40 +1469,3 @@ def test_partial_roc_auc_score():
         assert_almost_equal(
             roc_auc_score(y_true, y_pred, max_fpr=max_fpr),
             _partial_roc_auc_score(y_true, y_pred, max_fpr))
-
-
-@pytest.mark.parametrize(
-    "decision_method", ["predict_proba", "decision_function"]
-)
-def test_roc_auc_score_pos_label(decision_method):
-    X, y = load_breast_cancer(return_X_y=True)
-    # create an highly imbalanced
-    idx_positive = np.flatnonzero(y == 1)
-    idx_negative = np.flatnonzero(y == 0)
-    idx_selected = np.hstack([idx_negative, idx_positive[:25]])
-    X, y = X[idx_selected], y[idx_selected]
-    X, y = shuffle(X, y, random_state=42)
-    # only use 2 features to make the problem even harder
-    X = X[:, :2]
-    y = np.array(
-        ["cancer" if c == 1 else "not cancer" for c in y], dtype=object
-    )
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, stratify=y, random_state=0,
-    )
-
-    classifier = LogisticRegression()
-    classifier.fit(X_train, y_train)
-
-    # sanity check to be sure the positive class is classes_[0] and that we
-    # are betrayed by the class imbalance
-    assert classifier.classes_.tolist() == ["cancer", "not cancer"]
-    pos_label = "cancer"
-
-    y_pred = getattr(classifier, decision_method)(X_test)
-    y_pred = y_pred[:, 0] if y_pred.ndim == 2 else -y_pred
-
-    fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=pos_label)
-    roc_auc = roc_auc_score(y_test, y_pred, pos_label=pos_label)
-
-    assert roc_auc == pytest.approx(np.trapz(tpr, fpr))